In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics

In [None]:
dataset = pd.read_csv("export_dataframe.csv")

In [None]:
dataset.describe()

In [None]:
dataset.head()

In [None]:
dataset["response"].describe()

In [None]:
X = dataset.drop(['response', 'message'], axis=1)
X.dropna(axis="columns", inplace= True)
y = dataset['response']

In [None]:
X.head(10)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [None]:
def get_model_performance(algorithm: str, y_test, y_pred):
    print(f"--- {algorithm} Results ---")
    print(f"{algorithm} MAE:", metrics.mean_absolute_error(y_test, y_pred))
    print(f"{algorithm} MSE:", metrics.mean_squared_error(y_test, y_pred))
    print(f"{algorithm} RMSE:", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    
    print("\n")
    
    print(pd.DataFrame({'Actual':y_test, 'Predicted':y_pred}))

In [None]:
# decision tree
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

In [None]:
get_model_performance("Decision Tree", y_test, y_pred)

In [None]:
# random forest
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=0)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)

In [None]:
get_model_performance("Random Forest", y_test, y_pred)

In [None]:
# linear regression
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)
print(regressor.intercept_)
print(regressor.coef_)
y_pred = regressor.predict(X_test)

In [None]:
get_model_performance("Linear Regression", y_test, y_pred)

In [None]:
features = list(dataset.columns) 
features

In [None]:
# visualize feature weights
from xgboost import XGBClassifier
model = XGBClassifier(n_estimators=100)
model.fit(X, y)

feature_importance = model.feature_importances_

plt.figure(figsize=(20, 8))
plt.yscale("log", nonposy="clip")

features = list(dataset.columns) 
plt.bar(range(len(feature_importance)), feature_importance, align="center")
plt.xticks(range(len(feature_importance)), features, rotation="vertical")
plt.title("Feature Importance")
plt.ylabel("Importance")
plt.xlabel("Features")
plt.show()