In [1]:
import pandas as pd
from datetime import datetime, timedelta

# Load Data
input_data_file = "../data/user_stories.txt"  # Replace with your file name
with open(input_data_file, "r") as file:
    user_stories = file.readlines()

# Step 1: Extract Relevant Information from User Stories
data = []
for idx, line in enumerate(user_stories):
    if line.strip():
        # Example logic to extract user role and complexity
        description = line.strip()
        role = None
        if "customer" in description.lower():
            role = "Customer"
        elif "teacher" in description.lower():
            role = "Teacher"
        elif "student" in description.lower():
            role = "Student"
        elif "librarian" in description.lower():
            role = "Librarian"
        else:
            role = "General User"

        # Assign complexity based on the number of words in the description
        complexity = len(description.split())
        
        # Determine MoSCoW category from keywords in the story
        if "must" in description.lower():
            moscow_category = "Must Have"
        elif "should" in description.lower():
            moscow_category = "Should Have"
        elif "could" in description.lower():
            moscow_category = "Could Have"
        else:
            moscow_category = "Won’t Have"

        # Assign priority adjustment reason and dependencies based on specific keywords
        dependencies = None
        if "depend" in description.lower():
            dependencies = "Related to another feature"

        # Add data to the list
        data.append({
            "user_story_id": idx + 1,
            "user_story_description": description,
            "stakeholder_role": role,
            "complexity": complexity,
            "moscow_category": moscow_category,
            "dependencies": dependencies
        })

# Convert to DataFrame
df = pd.DataFrame(data)

# Step 2: Add Logical Fields Based on Relationships
def calculate_weighted_score(row):
    # Higher complexity and Must Have stories get higher scores
    base_score = 5
    if row['moscow_category'] == "Must Have":
        base_score += 5
    elif row['moscow_category'] == "Should Have":
        base_score += 3
    elif row['moscow_category'] == "Could Have":
        base_score += 1
    # Add a complexity weight
    base_score += row['complexity'] / 10
    return round(base_score, 2)

df['weighted_score'] = df.apply(calculate_weighted_score, axis=1)

# Final Priority
def determine_final_priority(row):
    if row['moscow_category'] == "Must Have":
        return "High"
    elif row['moscow_category'] == "Should Have":
        return "Medium"
    elif row['moscow_category'] == "Could Have":
        return "Low"
    else:
        return "Low"

df['final_priority'] = df.apply(determine_final_priority, axis=1)

# Assign Project Stage
def assign_project_stage(description):
    if "planning" in description.lower():
        return "Planning"
    elif "development" in description.lower():
        return "Development"
    elif "testing" in description.lower():
        return "Testing"
    else:
        return "Planning"

df['project_stage'] = df['user_story_description'].apply(assign_project_stage)

# Assign Timestamps
df['timestamp'] = [datetime.now() - timedelta(days=i) for i in range(len(df))]

# Outcome
df['outcome'] = df['final_priority'].apply(
    lambda x: "Approved" if x in ["High", "Medium"] else "Deferred"
)

# Step 3: Save Final Dataset
df.to_csv("../data/final_logic_based_dataset.csv", index=False)
print("Final dataset created successfully!")


Final dataset created successfully!
