In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
import joblib
import warnings
# Load the training set
df = pd.read_csv('sentiAnalysis.csv')  # Replace with the actual path to your training set



In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Define the independent variables (features) and the dependent variable (target)
features = ['emotion', 'Emotionality', 'Priority']  # Replace with your actual feature column names
target = 'Duration_hours'  # Replace with your actual target column name

X = df[features]
y = df[target]

# Define the column transformer with one-hot encoding for categorical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), ['emotion', 'Priority'])
    ],
    remainder='passthrough'
)

# Create a pipeline that includes the preprocessor and the linear regression model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', LinearRegression())
])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train the model using the pipeline
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

# Print model coefficients (Note: coefficients correspond to the transformed features)
print("Coefficients:", pipeline.named_steps['model'].coef_)
print("Intercept:", pipeline.named_steps['model'].intercept_)


Mean Squared Error: 183995726.6912031
R-squared: 0.04608675862227052
Coefficients: [-1.25119429e+16 -1.25119429e+16  5.23641058e+16  5.23641058e+16
  5.23641058e+16  5.23641058e+16  5.23641058e+16  3.57304688e+03]
Intercept: -3.9852162962696456e+16


In [10]:
df

Unnamed: 0,Issue_id,Priority,Component,Duplicated_issue,Title,Description,Status,Resolution,Version,Created_time,Resolved_time,Duration_hours,TimeLabel,Pos_Score,Neg_Score,Emotion,Emotionality
0,1,P3,Team,,Usability issue with external editors (1GE6IRL),- setup project contain * .gif resource ; - re...,CLOSED,FIXED,2.0,2001-10-11 01:34:00+00:00,2012-02-09 20:57:47+00:00,90571.383333,long,0.051768,0.065657,negative,0.117424
1,2,P5,Team,,Opening repository resources doesnt honor type...,open repository resource always open default t...,RESOLVED,FIXED,2.0,2001-10-11 01:34:00+00:00,2002-05-07 14:33:56+00:00,5004.983333,short,0.020833,0.056818,negative,0.077652
2,3,P5,Team,,Sync does not indicate deletion (1GIEN83),km ( 10/2/2001 5:55:18 pm ) ; pr deletion indi...,RESOLVED,FIXED,2.0,2001-10-11 01:34:00+00:00,2010-05-07 14:28:53+00:00,75132.900000,long,0.086364,0.036364,positive,0.122727
3,4,P5,Team,,need better error message if catching up over ...,- become synchronize project repository ; - us...,RESOLVED,FIXED,2.0,2001-10-11 01:34:00+00:00,2002-03-01 21:27:31+00:00,3403.883333,short,0.049342,0.092105,negative,0.141447
4,5,P3,Team,,ISharingManager sharing API inconsistent (1GAU...,getting/setting manage state resource ; method...,RESOLVED,WONTFIX,2.0,2001-10-11 01:34:00+00:00,2008-08-15 12:04:36+00:00,60010.500000,long,0.085227,0.005682,positive,0.090909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68119,229777,P3,UI,,[Viewers] Wrong argument in the first statemen...,build id : 3.3 ( i sure mean build id ; 3.3 ec...,RESOLVED,FIXED,3.3,2008-05-01 13:47:00+00:00,2008-05-10 14:06:07+00:00,216.316667,short,0.064024,0.027439,positive,0.091463
68120,229779,P3,UI,,NPE in performance tests,several npes within ui session test prevent ru...,VERIFIED,FIXED,3.4,2008-05-01 13:52:00+00:00,2008-05-20 14:12:14+00:00,456.333333,short,0.033333,0.033333,negative,0.066667
68121,229782,P3,UI,,Performance tests for ICU Collator,i20080501-0100 ; ; use collator ( see dependan...,VERIFIED,FIXED,3.4,2008-05-01 14:05:00+00:00,2009-06-01 18:25:12+00:00,9508.333333,long,0.025000,0.000000,positive,0.025000
68122,229789,P3,UI,,[Examples] examples plugins create duplicate m...,create attachment 98318 ; screenshot ; ; i2008...,VERIFIED,FIXED,3.4,2008-05-01 15:02:00+00:00,2008-05-31 01:57:57+00:00,706.916667,short,0.092105,0.013158,positive,0.105263


In [9]:

# Your DataFrame with input and target columns
# df = pd.read_csv('your_data.csv')  # Replace with your actual DataFrame
df2 = df[df['Resolution'].str.contains('FIXED', na=False)]
input_columns = ['Emotion']
target_column = 'TimeLabel'

# Concatenate input columns to form the feature set
X = df2[input_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
y = df2[target_column]  # Target variable

# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=1000)
X_vec = vectorizer.fit_transform(X)

# Initialize and train the MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=100, random_state=42)
model.fit(X_vec, y)

# Save the trained model and vectorizer using joblib
joblib.dump(model, 'MLPmodelFixedTime.joblib')
joblib.dump(vectorizer, 'vectorizer.joblib')

print("Model and vectorizer trained and saved.")


Model and vectorizer trained and saved.


In [10]:
#code for MLPTimeLabel Column
input_columns = ['Emotion']
target_column = 'TimeLabel'

# Concatenate input columns to form the feature set
X = df[input_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
y = df[target_column]  # Target variable

# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=1000)
X_vec = vectorizer.fit_transform(X)

# Initialize and train the MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=100, random_state=42)
model.fit(X_vec, y)

# Save the trained model and vectorizer using joblib
joblib.dump(model, 'MLPmodelTimeLabel.joblib')
joblib.dump(vectorizer, 'vectorizer2.joblib')

print("Model and vectorizer trained and saved.")

Model and vectorizer trained and saved.


In [15]:
#code for MLPTimeLabel Column
input_columns = ['Priority', 'Emotion']
target_column = 'TimeLabel'

# Concatenate input columns to form the feature set
X = df[input_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
y = df[target_column]  # Target variable

# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=1000)
X_vec = vectorizer.fit_transform(X)

# Initialize and train the MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=100, random_state=42)
model.fit(X_vec, y)

# Save the trained model and vectorizer using joblib
joblib.dump(model, 'MLPmodelTimeLabelPriority.joblib')
joblib.dump(vectorizer, 'vectorizer3.joblib')

print("Model and vectorizer trained and saved.")

Model and vectorizer trained and saved.


In [None]:
#code for MLPTimeLabel Column
input_columns = ['Priority', 'Emotion', 'Emotionality']
target_column = 'TimeLabel'

# Concatenate input columns to form the feature set
X = df[input_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
y = df[target_column]  # Target variable

# Vectorize the text data
vectorizer = TfidfVectorizer(max_features=1000)
X_vec = vectorizer.fit_transform(X)

# Initialize and train the MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', max_iter=100, random_state=42)
model.fit(X_vec, y)

# Save the trained model and vectorizer using joblib
joblib.dump(model, 'MLPmodelTimeLabelPriorityEmotionality.joblib')
joblib.dump(vectorizer, 'vectorizer4.joblib')

print("Model and vectorizer trained and saved.")