# Modeling

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, ensemble
from sklearn.linear_model._base import LinearModel
from sklearn.ensemble._forest import RandomForestRegressor, RandomForestClassifier
from typing import List, Callable
from sklearn.metrics import mean_squared_error, root_mean_squared_error, accuracy_score




## Data Loading

loading Splits

In [None]:
df_train = pd.read_csv('data/interaction_train_set.csv', sep=',', header=0)
df_test = pd.read_csv('data/interaction_test_set.csv', sep=',', header=0)
df_val = pd.read_csv('data/interaction_val_set.csv', sep=',', header=0)

display(df_train)
display(df_test)
display(df_val)

## Train Model

In [None]:

x_params = ['reciprocity', 'multiplexity', 'closeness', 'sentiment','interactionFrequency']
y_params = "tieStrength"

In [None]:

def train_model(model, train:pd.DataFrame, validation:pd.DataFrame, x:List[str], y:str, metrics:Callable):
    train_x = train[x].to_numpy()
    train_y = train[y].to_numpy()
    model.fit(train_x, train_y)
    validation_x = validation[x].to_numpy()
    validation_y = validation[y].to_numpy()
    validation_prediction_y = model.predict(validation_x)
    results = []
    model_name = model.__class__.__name__
    validation_results = {"model_name": model_name, "set_name": "validation"}
    for metric in metrics:
        metric_name = metric.__name__
        validation_results[metric_name] = metric(validation_y, validation_prediction_y)
    results.append(validation_results)
    train_prediction_y = model.predict(train_x)
    train_results = {"model_name": model_name, "set_name": "training"}
    for metric in metrics:
        metric_name = metric.__name__
        train_results[metric_name] = metric(train_y, train_prediction_y)
    results.append(train_results)
    return results


## Training

In [None]:
models = [linear_model.LinearRegression(),ensemble.RandomForestRegressor(), linear_model.Lasso(), linear_model.ElasticNet(),linear_model.Ridge()]
metrics = [mean_squared_error, root_mean_squared_error]

In [None]:

def train_models(model, train:pd.DataFrame, validation:pd.DataFrame, x:List[str], y:str, metrics:List[Callable]):
    results = []
    for model in models:
        results.extend(train_model(model, train, validation, x, y, metrics))
    return pd.DataFrame(results)
        

model_results = train_models(models, df_train, df_val, x_params, y_params, metrics)
model_results

## Evaluation

In [None]:
def eval_model(model, test:pd.DataFrame, x:List[str], y:str, metrics:List[Callable]):
    test_x = test[x].to_numpy()
    test_y = test[y].to_numpy()
    predicted_y = model.predict(test_x)
    results = []
    model_name = model.__class__.__name__
    test_results = {"model_name": model_name, "set_name": "test"}
    for metric in metrics:
        metric_name = metric.__name__
        test_results[metric_name] = metric(test_y, predicted_y)
    results.append(test_results)
    return results



In [None]:
def eval_models(model, test:pd.DataFrame, x:List[str], y:str, metrics:List[Callable]):
    results = []
    for model in models:
        results.extend(eval_model(model, test, x, y, metrics))
    return pd.DataFrame(results)

eval_results = eval_models(models, df_test, x_params, y_params, metrics)
eval_results
        

In [None]:
results = pd.concat([eval_results, model_results])
results = results.sort_values("model_name")
results

## Visualization

In [None]:
results = results.reset_index()

In [None]:
results = results.set_index(["model_name"])


In [None]:
results

In [None]:

results.plot.bar()