## Create Dataframe

In [None]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [None]:
df = pd.read_csv("insider_book.csv",  encoding="utf-8", sep=";")
df.head()

In [None]:
df.columns

In [None]:
df.shape

#### Remove unnecessary features

In [None]:
df.drop(["Industry", "Filing Date", "date 1y before", " unratechange ", "better date before", "firstofmonth", "date 1y after", "date 1y after better", "Period", "Quarter", "ticker", "Company Name", "1y before price", " 1yafterpricechange "], axis=1, inplace=True)
df.head()

#### Transform values to correct types

In [None]:
df.dtypes

In [None]:
df = df.astype(str)

In [None]:
# change % values to floats
df=  df.apply(lambda x: x.str.replace(",",".").str.replace("%","").str.replace("$",""))

df.head()

In [None]:
import numpy as np

df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df = df.apply(lambda x: x.str.replace(r'(\d+)\.(\d+)\.', r'\1.\2').str.replace(',', ''))

df.head(15)

In [None]:
# convert columns to float
for col in df.columns:
    try:
        df[col] = df[col].astype(float)
    except ValueError:
        # replace non-numeric values with NaN
        df[col] = df[col].replace('-', pd.NaT)

In [None]:
print(df.dtypes)

In [None]:
# change target feature to binary (1 if > 20 , 0 if <= 2)
df["target"] = df["target"].apply(lambda x: 1 if x < -50 else 0)

df.head()

In [None]:
df.shape

#### Run AutoML

In [None]:
from supervised.automl import AutoML
from sklearn.metrics import log_loss

X = df.drop("target", axis=1)
y = df["target"]

automl = AutoML(eval_metric="average_precision", results_path = "previous_autoML")
automl.fit(X, y)

In [None]:
import pickle

with open('short_insiders.pkl', 'wb') as file:
    pickle.dump(automl, file)

In [None]:
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

joblib.dump(automl, "short_insider2.pkl")


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
 
print(model.score(X_test, y_test))
print(classification_report(y_test, y_pred))