In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from math import sqrt
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import precision_score, make_scorer,f1_score
import traceback
import prettytable


In [None]:
import warnings; warnings.simplefilter('ignore')

In [None]:
def pre_process_data(data,null_threshold):
    """
    Drops Date and Unix Date columns from the data.
    Drops the columns which has null values more than specified null_threshold.
    Replaces infinite values with NAN.
    Drops the rows which has null values.

    Parameters
    ----------
    data : dataframe

    null_threshold : numeric
        numeric value describing the amount of null values that can be present.

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    """
    
    data.drop(columns=['Unix Date','Date'],axis=1,inplace=True)
    total = data.shape[0]
    for col in data.columns:
        if null_threshold * total / 100 < data[col].isnull().sum():
            data.drop(columns=[col],axis=1,inplace=True)
    data.replace([np.inf, -np.inf], np.nan, inplace=True)
    data = data.apply(pd.to_numeric,errors='coerce')
    data.dropna(axis=0,inplace=True)
    return data

In [None]:
def dependent_column(data,column):
    """
    Removes all the Next Day columns.
    Removes all the non Growth Rate Columns (GR)
    add the predictor column to list of columns.

    Parameters
    ----------
    data : dataframe

    column : string
        name of the predictor column 

    Returns
    -------
    data : dataframe
        an updated dataframe after performing all the opertaions.
    column : string
        name of the predictor column
    """
    cols = [col for col in data.columns if "next" not in col.lower() and col.lower().endswith("gr")]
    cols.append(column)
    data = data[cols]
    return (data,column)

In [None]:
def create_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true,y_pred)
    accuracy = metrics.accuracy_score(y_true,y_pred)
    precision = metrics.precision_score(y_true,y_pred)
    recall = metrics.recall_score(y_true,y_pred)
    f1_score = metrics.f1_score(y_true,y_pred)
    return {"accuracy":accuracy,"precision":precision,"recall":recall,"f1_score":f1_score,"confusion matrix":cm}

In [None]:
def error_metrics(y_true, y_pred):
    rmse = sqrt(metrics.mean_squared_error(y_true, y_pred))
    mae = metrics.mean_absolute_error(y_true, y_pred)
    mse = metrics.mean_squared_error(y_true, y_pred)
    return {"root_mean_squared_error":rmse,"mean_absolute_error":mae,"mean_squared_error":mse}

In [None]:
def create_svm_classification(df,column,C,gamma,kernel,rate):
    df["Target"] = df[column].apply(lambda x : 1 if x >= rate else 0)
    X = df.drop(columns=["Target",column])
    Y = df["Target"]
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3,random_state = 0)
    model = SVC(kernel=kernel,gamma=gamma,C=C)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test) 
    result = {}
    error = error_metrics(y_test, y_pred)
    confusion = create_confusion_matrix(y_test,y_pred)
    result.update(error)
    result.update(confusion)
    return result

In [None]:
def svm_classification(df,column = "Next Day Close Price GR"):
    rate_of_growth = [0.001,0.002,0.003,0.004,0.005]
    solution = list()
    
    for t in rate_of_growth:
        df["Target"] = df[column].apply(lambda x : 1 if x >= t else 0)
        X = df.drop(columns=["Target",column])
        Y = df["Target"]
        x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3,random_state = 0)
        param_grid = {'C': [0.1, 1, 10, 100, 1000], 
                          'gamma':[0.1,0.01,0.001,0.0001,0.00001],
                          'kernel': ['rbf','sigmoid']}
        
        custom_scorer = make_scorer(precision_score, greater_is_better=True, pos_label = 1)
        grid = GridSearchCV(SVC(),param_grid, refit = True, verbose = 0,scoring = custom_scorer) 
        grid.fit(x_train, y_train)
        
        kernel = grid.best_params_['kernel']
        gamma = grid.best_params_['gamma']
        C = grid.best_params_['C']
        
        result = create_svm_classification(df,column,C,gamma,kernel,t)
        result.update(grid.best_params_)
        result.update({"rate_of_growth":t})
        solution.append(result)
    return solution

In [None]:
security_codes = list()
for filename in os.listdir("../input/newdata/grstocks"):
    security_codes.append(filename[2:-4])
security_codes.sort()

In [None]:
%%time
for name in security_codes:
    try:
        print(name)
        df = pd.read_csv(os.path.join("../input/newdata/grstocks/","gr"+str(name)+".csv"))
        df = pre_process_data(df,60)
        column = "Next Day Close Price GR"
        df,column = dependent_column(df,column)
        result = svm_classification(df,column = "Next Day Close Price GR")
        result_df = pd.DataFrame(result)
        result_df.to_csv("svm_"+str(name)+".csv",index=None)
    except Exception as e:
        traceback.print_exc() 