In [None]:
import sys
sys.path.append('..')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

from src.data_collection.linear_api import fetch_issues
from src.data_collection.github_api import fetch_merged_prs
from src.data_processing.preprocessor import preprocess_data
from src.model.predictor import CompletionPredictor

In [None]:
issues = fetch_issues()
prs = fetch_merged_prs()

print(f"Fetched {len(issues)} issues and {len(prs)} pull requests.")

In [None]:
df = preprocess_data(issues, prs)
print(df.head())
print(df.describe())

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='estimate', y='completion_time')
plt.title('Story Points vs Completion Time')
plt.xlabel('Story Points')
plt.ylabel('Completion Time (days)')
plt.show()

plt.figure(figsize=(10, 6))
sns.boxplot(data=df, x='day_of_week', y='completion_time')
plt.title('Completion Time by Day of Week')
plt.xlabel('Day of Week (0 = Monday, 6 = Sunday)')
plt.ylabel('Completion Time (days)')
plt.show()

In [None]:
X = df[['estimate', 'day_of_week']]
y = df['completion_time']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

predictor = CompletionPredictor()
predictor.train(df)

y_pred = predictor.model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

print(f"Mean Absolute Error: {mae:.2f} days")
print(f"Mean Squared Error: {mse:.2f} days^2")
print(f"Root Mean Squared Error: {rmse:.2f} days")


In [None]:
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': predictor.model.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(data=feature_importance, x='importance', y='feature')
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

In [None]:
story_points = 3
day_of_week = 2  # Wednesday

predicted_days = predictor.predict(story_points, day_of_week)
print(f"A task with {story_points} story points started on day {day_of_week} (Wednesday) is predicted to take {predicted_days:.2f} days.")