In [1]:
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
%matplotlib inline

In [3]:
sns.set(color_codes=True)

In [4]:
pd.set_option('display.max_columns', 100)

In [5]:
# Data Loading
PICKLE_TRAIN_DIR = os.path.join("..","processed_data","train_data.pkl")
PICKLE_HIST_DIR = os.path.join("..","processed_data","history_data.pkl")
PICKLE_FEATURES_DIR = os.path.join("..","processed_data","features.pkl")

In [6]:
history_data = pd.read_pickle(PICKLE_HIST_DIR)
train = pd.read_pickle(PICKLE_TRAIN_DIR)
features = pd.read_pickle(PICKLE_FEATURES_DIR)

In [7]:
history_data.head()

Unnamed: 0,id,price_date,price_p1_var,price_p2_var,price_p3_var,price_p1_fix,price_p2_fix,price_p3_fix
0,038af19179925da21a25619c5a24b745,2015-01-01,0.151367,0.0,0.0,44.266931,0.0,0.0
1,038af19179925da21a25619c5a24b745,2015-02-01,0.151367,0.0,0.0,44.266931,0.0,0.0
2,038af19179925da21a25619c5a24b745,2015-03-01,0.151367,0.0,0.0,44.266931,0.0,0.0
3,038af19179925da21a25619c5a24b745,2015-04-01,0.149626,0.0,0.0,44.266931,0.0,0.0
4,038af19179925da21a25619c5a24b745,2015-05-01,0.149626,0.0,0.0,44.266931,0.0,0.0


In [8]:
train.head()

Unnamed: 0,id,cons_12m,cons_gas_12m,cons_last_month,forecast_cons_12m,forecast_discount_energy,forecast_meter_rent_12m,forecast_price_energy_p1,forecast_price_energy_p2,forecast_price_pow_p1,has_gas,imp_cons,margin_gross_pow_ele,margin_net_pow_ele,nb_prod_act,net_margin,pow_max,churn,tenure,months_activ,months_to_end,months_modif_prod,months_renewal,channel_epu,channel_ewp,channel_fix,channel_foo,channel_lmk,channel_sdd,channel_usi,origin_ewx,origin_kam,origin_ldk,origin_lxi,origin_usa,activity_apd,activity_ckf,activity_clu,activity_cwo,activity_fmw,activity_kkk,activity_kwu,activity_sfi,activity_wxe
0,48ada52261e7cf58715202705a0451c9,5.490346,0.0,4.001128,4.423595,0.0,2.556652,0.095919,0.088347,58.995952,0,2.920541,-41.76,-41.76,1,198.346424,18.402912,0,3,37.0,10.0,37.0,1.0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,24011ae4ebbe3035111d65fa7c15bc57,4.327104,4.739944,0.0,3.085953,0.0,0.444045,0.114481,0.098142,40.606701,1,0.0,25.44,25.44,2,678.99,43.648,1,3,30.0,5.0,2.0,6.0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
2,d29c2c54acc38ff3c0614d0a653813dd,3.668479,0.0,0.0,2.28092,0.0,1.237292,0.145711,0.0,44.311378,0,0.0,16.38,16.38,1,18.89,13.8,0,7,76.0,7.0,76.0,4.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,764c75f661154dac3a6c254cd082ea7d,2.736397,0.0,0.0,1.689841,0.0,1.599009,0.165794,0.087899,44.311378,0,0.0,28.6,28.6,1,6.6,13.856,0,6,68.0,3.0,68.0,8.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,bba03439a292a1e166f80264c16191cb,3.200029,0.0,0.0,2.382089,0.0,1.318689,0.146694,0.0,44.311378,0,0.0,30.22,30.22,1,25.46,13.2,0,6,69.0,2.0,69.0,9.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
features.head()

Unnamed: 0,id,mean_year_price_p1_var,mean_year_price_p2_var,mean_year_price_p3_var,mean_year_price_p1_fix,mean_year_price_p2_fix,mean_year_price_p3_fix,mean_year_price_p1,mean_year_price_p2,mean_year_price_p3
0,0002203ffbb812588b632b9e628cc38d,0.124338,0.103794,0.07316,40.701732,24.421038,16.280694,40.826071,24.524832,16.353854
1,0004351ebdd665e6ee664792efc4fd13,0.146426,0.0,0.0,44.38545,0.0,0.0,44.531877,0.0,0.0
2,0010bcc39e42b3c2131ed2ce55246e3c,0.181558,0.0,0.0,45.31971,0.0,0.0,45.501268,0.0,0.0
3,0010ee3855fdea87602a5b7aba8e42de,0.118757,0.098292,0.069032,40.647427,24.388455,16.258971,40.766185,24.486748,16.328003
4,00114d74e963e47177db89bc70108537,0.147926,0.0,0.0,44.26693,0.0,0.0,44.414856,0.0,0.0


In [10]:
train = pd.merge(train, features, how="outer", on=["id"])

In [11]:
train.head()

Unnamed: 0,id,cons_12m,cons_gas_12m,cons_last_month,forecast_cons_12m,forecast_discount_energy,forecast_meter_rent_12m,forecast_price_energy_p1,forecast_price_energy_p2,forecast_price_pow_p1,has_gas,imp_cons,margin_gross_pow_ele,margin_net_pow_ele,nb_prod_act,net_margin,pow_max,churn,tenure,months_activ,months_to_end,months_modif_prod,months_renewal,channel_epu,channel_ewp,channel_fix,channel_foo,channel_lmk,channel_sdd,channel_usi,origin_ewx,origin_kam,origin_ldk,origin_lxi,origin_usa,activity_apd,activity_ckf,activity_clu,activity_cwo,activity_fmw,activity_kkk,activity_kwu,activity_sfi,activity_wxe,mean_year_price_p1_var,mean_year_price_p2_var,mean_year_price_p3_var,mean_year_price_p1_fix,mean_year_price_p2_fix,mean_year_price_p3_fix,mean_year_price_p1,mean_year_price_p2,mean_year_price_p3
0,48ada52261e7cf58715202705a0451c9,5.490346,0.0,4.001128,4.423595,0.0,2.556652,0.095919,0.088347,58.995952,0,2.920541,-41.76,-41.76,1,198.346424,18.402912,0,3,37.0,10.0,37.0,1.0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.103449,0.092115,0.067241,58.956502,36.356887,8.337051,59.05995,36.449002,8.404292
1,24011ae4ebbe3035111d65fa7c15bc57,4.327104,4.739944,0.0,3.085953,0.0,0.444045,0.114481,0.098142,40.606701,1,0.0,25.44,25.44,2,678.99,43.648,1,3,30.0,5.0,2.0,6.0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0.124787,0.100749,0.06653,40.942265,22.35201,14.90134,41.067053,22.45276,14.967871
2,d29c2c54acc38ff3c0614d0a653813dd,3.668479,0.0,0.0,2.28092,0.0,1.237292,0.145711,0.0,44.311378,0,0.0,16.38,16.38,1,18.89,13.8,0,7,76.0,7.0,76.0,4.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.149609,0.007124,0.0,44.311375,0.0,0.0,44.460984,0.007124,0.0
3,764c75f661154dac3a6c254cd082ea7d,2.736397,0.0,0.0,1.689841,0.0,1.599009,0.165794,0.087899,44.311378,0,0.0,28.6,28.6,1,6.6,13.856,0,6,68.0,3.0,68.0,8.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.170512,0.088421,0.0,44.38545,0.0,0.0,44.555962,0.088421,0.0
4,bba03439a292a1e166f80264c16191cb,3.200029,0.0,0.0,2.382089,0.0,1.318689,0.146694,0.0,44.311378,0,0.0,30.22,30.22,1,25.46,13.2,0,6,69.0,2.0,69.0,9.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.15121,0.0,0.0,44.400265,0.0,0.0,44.551475,0.0,0.0


In [12]:
#X = train["id","cons_12m","cons_gas_12m","cons_last_month","forecast_cons_12m","forecast_discount_energy",
#                     "forecast_meter_rent_12m","forecast_price_energy_p1","forecast_price_energy_p2","forecast_price_pow_p1","has_gas",
#                     "imp_cons","margin_gross_pow_ele","margin_net_pow_ele","nb_prod_act","net_margin","pow_max","tenure","months_activ",
#                     "months_to_end","months_modif_prod","months_renewal","channel_epu","channel_ewp","channel_fix","channel_foo",
#                     "channel_lmk","channel_sdd","channel_usi","origin_ewx","origin_kam","origin_ldk","origin_lxi","origin_usa","activity_apd",
#                     "activity_ckf","activity_clu","activity_cwo","activity_fmw","activity_kkk","activity_kwu","activity_sfi","activity_wxe",
#                     "mean_year_price_p1_var","mean_year_price_p2_var","mean_year_price_p3_var","mean_year_price_p1_fix","mean_year_price_p2_fix",
#                     "mean_year_price_p3_fix","mean_year_price_p1","mean_year_price_p2","mean_year_price_p3"].values
y = train["churn"].values
X1 = train.drop(["churn"], axis = 1)
cust_id = train["id"].values
X2 = X1.drop(["id"], axis = 1)
X = X2.iloc[:,:].values

In [13]:
X1.head()

Unnamed: 0,id,cons_12m,cons_gas_12m,cons_last_month,forecast_cons_12m,forecast_discount_energy,forecast_meter_rent_12m,forecast_price_energy_p1,forecast_price_energy_p2,forecast_price_pow_p1,has_gas,imp_cons,margin_gross_pow_ele,margin_net_pow_ele,nb_prod_act,net_margin,pow_max,tenure,months_activ,months_to_end,months_modif_prod,months_renewal,channel_epu,channel_ewp,channel_fix,channel_foo,channel_lmk,channel_sdd,channel_usi,origin_ewx,origin_kam,origin_ldk,origin_lxi,origin_usa,activity_apd,activity_ckf,activity_clu,activity_cwo,activity_fmw,activity_kkk,activity_kwu,activity_sfi,activity_wxe,mean_year_price_p1_var,mean_year_price_p2_var,mean_year_price_p3_var,mean_year_price_p1_fix,mean_year_price_p2_fix,mean_year_price_p3_fix,mean_year_price_p1,mean_year_price_p2,mean_year_price_p3
0,48ada52261e7cf58715202705a0451c9,5.490346,0.0,4.001128,4.423595,0.0,2.556652,0.095919,0.088347,58.995952,0,2.920541,-41.76,-41.76,1,198.346424,18.402912,3,37.0,10.0,37.0,1.0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.103449,0.092115,0.067241,58.956502,36.356887,8.337051,59.05995,36.449002,8.404292
1,24011ae4ebbe3035111d65fa7c15bc57,4.327104,4.739944,0.0,3.085953,0.0,0.444045,0.114481,0.098142,40.606701,1,0.0,25.44,25.44,2,678.99,43.648,3,30.0,5.0,2.0,6.0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0.124787,0.100749,0.06653,40.942265,22.35201,14.90134,41.067053,22.45276,14.967871
2,d29c2c54acc38ff3c0614d0a653813dd,3.668479,0.0,0.0,2.28092,0.0,1.237292,0.145711,0.0,44.311378,0,0.0,16.38,16.38,1,18.89,13.8,7,76.0,7.0,76.0,4.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.149609,0.007124,0.0,44.311375,0.0,0.0,44.460984,0.007124,0.0
3,764c75f661154dac3a6c254cd082ea7d,2.736397,0.0,0.0,1.689841,0.0,1.599009,0.165794,0.087899,44.311378,0,0.0,28.6,28.6,1,6.6,13.856,6,68.0,3.0,68.0,8.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.170512,0.088421,0.0,44.38545,0.0,0.0,44.555962,0.088421,0.0
4,bba03439a292a1e166f80264c16191cb,3.200029,0.0,0.0,2.382089,0.0,1.318689,0.146694,0.0,44.311378,0,0.0,30.22,30.22,1,25.46,13.2,6,69.0,2.0,69.0,9.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.15121,0.0,0.0,44.400265,0.0,0.0,44.551475,0.0,0.0


In [14]:
X2.head()

Unnamed: 0,cons_12m,cons_gas_12m,cons_last_month,forecast_cons_12m,forecast_discount_energy,forecast_meter_rent_12m,forecast_price_energy_p1,forecast_price_energy_p2,forecast_price_pow_p1,has_gas,imp_cons,margin_gross_pow_ele,margin_net_pow_ele,nb_prod_act,net_margin,pow_max,tenure,months_activ,months_to_end,months_modif_prod,months_renewal,channel_epu,channel_ewp,channel_fix,channel_foo,channel_lmk,channel_sdd,channel_usi,origin_ewx,origin_kam,origin_ldk,origin_lxi,origin_usa,activity_apd,activity_ckf,activity_clu,activity_cwo,activity_fmw,activity_kkk,activity_kwu,activity_sfi,activity_wxe,mean_year_price_p1_var,mean_year_price_p2_var,mean_year_price_p3_var,mean_year_price_p1_fix,mean_year_price_p2_fix,mean_year_price_p3_fix,mean_year_price_p1,mean_year_price_p2,mean_year_price_p3
0,5.490346,0.0,4.001128,4.423595,0.0,2.556652,0.095919,0.088347,58.995952,0,2.920541,-41.76,-41.76,1,198.346424,18.402912,3,37.0,10.0,37.0,1.0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.103449,0.092115,0.067241,58.956502,36.356887,8.337051,59.05995,36.449002,8.404292
1,4.327104,4.739944,0.0,3.085953,0.0,0.444045,0.114481,0.098142,40.606701,1,0.0,25.44,25.44,2,678.99,43.648,3,30.0,5.0,2.0,6.0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0.124787,0.100749,0.06653,40.942265,22.35201,14.90134,41.067053,22.45276,14.967871
2,3.668479,0.0,0.0,2.28092,0.0,1.237292,0.145711,0.0,44.311378,0,0.0,16.38,16.38,1,18.89,13.8,7,76.0,7.0,76.0,4.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.149609,0.007124,0.0,44.311375,0.0,0.0,44.460984,0.007124,0.0
3,2.736397,0.0,0.0,1.689841,0.0,1.599009,0.165794,0.087899,44.311378,0,0.0,28.6,28.6,1,6.6,13.856,6,68.0,3.0,68.0,8.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.170512,0.088421,0.0,44.38545,0.0,0.0,44.555962,0.088421,0.0
4,3.200029,0.0,0.0,2.382089,0.0,1.318689,0.146694,0.0,44.311378,0,0.0,30.22,30.22,1,25.46,13.2,6,69.0,2.0,69.0,9.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.15121,0.0,0.0,44.400265,0.0,0.0,44.551475,0.0,0.0


In [15]:
train.head()

Unnamed: 0,id,cons_12m,cons_gas_12m,cons_last_month,forecast_cons_12m,forecast_discount_energy,forecast_meter_rent_12m,forecast_price_energy_p1,forecast_price_energy_p2,forecast_price_pow_p1,has_gas,imp_cons,margin_gross_pow_ele,margin_net_pow_ele,nb_prod_act,net_margin,pow_max,churn,tenure,months_activ,months_to_end,months_modif_prod,months_renewal,channel_epu,channel_ewp,channel_fix,channel_foo,channel_lmk,channel_sdd,channel_usi,origin_ewx,origin_kam,origin_ldk,origin_lxi,origin_usa,activity_apd,activity_ckf,activity_clu,activity_cwo,activity_fmw,activity_kkk,activity_kwu,activity_sfi,activity_wxe,mean_year_price_p1_var,mean_year_price_p2_var,mean_year_price_p3_var,mean_year_price_p1_fix,mean_year_price_p2_fix,mean_year_price_p3_fix,mean_year_price_p1,mean_year_price_p2,mean_year_price_p3
0,48ada52261e7cf58715202705a0451c9,5.490346,0.0,4.001128,4.423595,0.0,2.556652,0.095919,0.088347,58.995952,0,2.920541,-41.76,-41.76,1,198.346424,18.402912,0,3,37.0,10.0,37.0,1.0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.103449,0.092115,0.067241,58.956502,36.356887,8.337051,59.05995,36.449002,8.404292
1,24011ae4ebbe3035111d65fa7c15bc57,4.327104,4.739944,0.0,3.085953,0.0,0.444045,0.114481,0.098142,40.606701,1,0.0,25.44,25.44,2,678.99,43.648,1,3,30.0,5.0,2.0,6.0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0.124787,0.100749,0.06653,40.942265,22.35201,14.90134,41.067053,22.45276,14.967871
2,d29c2c54acc38ff3c0614d0a653813dd,3.668479,0.0,0.0,2.28092,0.0,1.237292,0.145711,0.0,44.311378,0,0.0,16.38,16.38,1,18.89,13.8,0,7,76.0,7.0,76.0,4.0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.149609,0.007124,0.0,44.311375,0.0,0.0,44.460984,0.007124,0.0
3,764c75f661154dac3a6c254cd082ea7d,2.736397,0.0,0.0,1.689841,0.0,1.599009,0.165794,0.087899,44.311378,0,0.0,28.6,28.6,1,6.6,13.856,0,6,68.0,3.0,68.0,8.0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.170512,0.088421,0.0,44.38545,0.0,0.0,44.555962,0.088421,0.0
4,bba03439a292a1e166f80264c16191cb,3.200029,0.0,0.0,2.382089,0.0,1.318689,0.146694,0.0,44.311378,0,0.0,30.22,30.22,1,25.46,13.2,0,6,69.0,2.0,69.0,9.0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.15121,0.0,0.0,44.400265,0.0,0.0,44.551475,0.0,0.0


In [16]:
X

array([[5.49034622e+00, 0.00000000e+00, 4.00112770e+00, ...,
        5.90599502e+01, 3.64490021e+01, 8.40429201e+00],
       [4.32710444e+00, 4.73994399e+00, 0.00000000e+00, ...,
        4.10670526e+01, 2.24527596e+01, 1.49678708e+01],
       [3.66847910e+00, 0.00000000e+00, 0.00000000e+00, ...,
        4.44609842e+01, 7.12358333e-03, 0.00000000e+00],
       ...,
       [3.26599637e+00, 0.00000000e+00, 2.25527251e+00, ...,
        4.08260708e+01, 2.45248322e+01, 1.63538543e+01],
       [2.12057393e+00, 0.00000000e+00, 0.00000000e+00, ...,
        4.44609843e+01, 7.12358333e-03, 0.00000000e+00],
       [3.94106399e+00, 0.00000000e+00, 0.00000000e+00, ...,
        4.44355921e+01, 8.73442500e-02, 0.00000000e+00]])

In [17]:
y

array([0, 1, 0, ..., 1, 0, 0], dtype=int64)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=11111)

In [19]:
normalizer = StandardScaler()
X_train_norm = normalizer.fit_transform(X_train)
X_test_norm = normalizer.transform(X_test)

In [20]:
def H(z):
    g = 1/(1 + np.exp(-z))
    return g

In [21]:
def Z(theta, x1):
    z = 0
    count = 0
    for element in theta:
        z += element*x1[count]
        count += 1
    return z

In [22]:
def cost(theta, x, y):
    cost = 0
    m = len(x)
    for i in range(m):
        x1 = x[i][:]
        z = Z(theta, x1)
        h = H(z)
        cost += -(y[i]*np.log(h) + ((1-y[i])*np.log(1-h)))
    cost = cost/m
    return cost

In [23]:
def gradDescent(theta, x, y, alpha, maxIter):
    for k in range(maxIter):
        theta1 = theta
        for i in range(len(theta)):
            derCost = 0
            j = 0
            for j in range(len(x)):
                x1 = x[j][:]
                z = Z(theta, x1)
                h = H(z)
                derCost += (h - y[j])*x[j][i]
            theta1[i] = theta[i] - alpha*derCost/len(x)      
        theta = theta1
        print(k)
    return theta

In [24]:
def accuracy(theta_Final, x, y):
    q = 0
    p = []
    for i in range(len(x)):
        x1 = x[i][:]
        z = Z(theta_Final, x1)
        h = H(z)
        if h>=0.5:
            p.insert(i, 1)
        if h<0.5:
            p.insert(i, 0)
        if y[i]==p[i]:
            q += 1
    accuracy = q*100/len(y)
    return accuracy

In [25]:
len(X[0])

51

In [26]:
init_Theta = np.random.rand(51)

In [27]:
init_Theta

array([0.67386996, 0.23186337, 0.92001406, 0.30498526, 0.52399055,
       0.12522231, 0.4479645 , 0.99350618, 0.45224177, 0.29320221,
       0.59746368, 0.94942425, 0.95246635, 0.61949269, 0.57589509,
       0.73204561, 0.69671099, 0.03472371, 0.92828298, 0.20768896,
       0.35872472, 0.52790611, 0.71567778, 0.65311467, 0.25592577,
       0.02023214, 0.19006124, 0.58280008, 0.5934219 , 0.86818564,
       0.23718414, 0.40765601, 0.3728551 , 0.6786218 , 0.2491966 ,
       0.09047461, 0.74847125, 0.36762148, 0.53289159, 0.90934353,
       0.67693836, 0.44232166, 0.76863623, 0.12789014, 0.29591151,
       0.78608857, 0.56157843, 0.10415269, 0.34708718, 0.33019347,
       0.4750483 ])

In [28]:
alpha = 0.03
maxIter = 1500
#theta_Final = gradDescent(init_Theta, X_train, y_train, alpha, maxIter)

In [29]:
#theta_Final

In [30]:
#accuracy(theta_Final, X_train, y_train)

In [31]:
#accuracy(theta_Final, X_test, y_test)

In [32]:
#accuracy(theta_Final, X, y)

In [33]:
#PICKLE_THETA_DIR = os.path.join("..", "processed_data", "theta_Final.pkl")

In [34]:
#pd.to_pickle(theta_Final, PICKLE_THETA_DIR)

In [35]:
PICKLE_THETA_DIR = os.path.join("..","processed_data","theta_Final.pkl")
theta_Final = pd.read_pickle(PICKLE_THETA_DIR)

In [36]:
theta_Final

array([  1.45363467,  -0.26977699,   0.0671264 ,   1.63509119,
         0.26988198,   1.23984178,   0.86010194,   0.38128296,
        15.25164087,   0.39327479,   0.74096994,   0.14463811,
         3.20254983,   0.77300188,   0.31866276, -10.21153   ,
        -0.39941358,  -4.24395382,  -0.66678216,   0.50271257,
        -3.90918866,   0.98519878,  -0.02944709,   0.25508629,
         0.02470777,  -0.11884956,   0.35872805,   0.65244579,
         0.88073241,   0.55475428,   0.77001911,   0.16876851,
         0.03973932,  -0.19313791,   0.37440583,   0.35527686,
         0.44467237,   0.53826421,   0.47527731,   0.42185964,
         0.03442815,   0.107702  ,   0.38461469,   0.19757691,
         0.50018722, -15.151358  ,  -1.37780788,  -0.03047638,
         3.88276988,   2.44144667,   2.29937632])

In [37]:
theta_Final_val = gradDescent(theta_Final, X_test, y_test, alpha, 100)

  g = 1/(1 + np.exp(-z))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [38]:
accuracy(theta_Final_val, X, y)

  g = 1/(1 + np.exp(-z))


85.6610337972167

In [39]:
accuracy(theta_Final_val, X_train, y_train)

  g = 1/(1 + np.exp(-z))


85.15573227302849

In [40]:
theta_Final2 = gradDescent(theta_Final_val, X_train, y_train, alpha, 50)

  g = 1/(1 + np.exp(-z))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [41]:
accuracy(theta_Final2, X_test, y_test)

  g = 1/(1 + np.exp(-z))


90.25844930417495

In [42]:
accuracy(theta_Final2, X, y)

  g = 1/(1 + np.exp(-z))


89.17743538767395

In [43]:
theta_Final_val2 = gradDescent(theta_Final2, X_test, y_test, alpha, 50)

  g = 1/(1 + np.exp(-z))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [44]:
accuracy(theta_Final_val2, X, y)

  g = 1/(1 + np.exp(-z))


89.22092445328032

In [45]:
accuracy(theta_Final_val2, X_test, y_test)

  g = 1/(1 + np.exp(-z))


90.35785288270378

In [46]:
theta_Final3 = gradDescent(theta_Final_val2, X_train, y_train, alpha, 50)

  g = 1/(1 + np.exp(-z))


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


In [47]:
accuracy(theta_Final3, X, y)

  g = 1/(1 + np.exp(-z))


77.26143141153082

In [48]:
accuracy(theta_Final3, X_test, y_test)

  g = 1/(1 + np.exp(-z))


79.00099403578528

In [54]:
def f_score(theta, x, y):
    q = 0
    p = []
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(x)):
        x1 = x[i][:]
        z = Z(theta_Final, x1)
        h = H(z)
        if h>=0.5:
            p.insert(i, 1)
        if h<0.5:
            p.insert(i, 0)
        if y[i]==p[i]:
            q += 1
        if (y[i]==1)and(p[i]==1):
            tp +=1
        if (y[i]==0)and(p[i]==1):
            fp +=1
        if (y[i]==1)and(p[i]==0):
            fn +=1
    precision = tp/(tp + fn)
    recall = tp/(tp + fp)
    f_s = 2*precision*recall/(precision + recall)
    return f_s, precision, recall

In [55]:
f_score(theta_Final_val2, X, y)

  g = 1/(1 + np.exp(-z))


(0.20676202860858256, 0.29905956112852666, 0.1579993375289831)