In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer # word counts
from sklearn.feature_extraction.text import TfidfTransformer # term frequency tfidf 

from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel # feature selection - ranked features; gives top features


#NLTK-------------------------------
import nltk # natural language toolkit
nltk.download('punkt')
from nltk.tokenize import word_tokenize # breaks sentences into single words
from nltk.stem.snowball import SnowballStemmer 
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.lancaster import LancasterStemmer

# Import libraries for feature 
from sklearn.feature_selection import SelectKBest #rank and find the best combination of features
from sklearn.feature_selection import chi2


from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,classification_report
from sklearn import metrics
from sklearn.model_selection import cross_val_score

import warnings
warnings.filterwarnings("ignore")


[nltk_data] Downloading package punkt to C:\Users\Shehjar
[nltk_data]     Raina\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
textData = pd.read_csv("C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\Comments.csv")
CustInfoData = pd.read_csv("C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\Customers.csv") 
print(textData.shape)
print(CustInfoData.shape)

(2070, 2)
(2070, 17)


In [4]:
textData.head()

Unnamed: 0,ID,Comments
0,1309,Does not like the way the phone works. It is t...
1,3556,Wanted to know the nearest store location. Wan...
2,2230,Wants to know how to do text messaging. Referr...
3,2312,Asked how to disable call waiting. referred hi...
4,3327,Needs help learning how to use the phone. I su...


In [5]:
CustInfoData.head()

Unnamed: 0,ID,Sex,Status,Children,Est_Income,Car_Owner,Usage,Age,RatePlan,LongDistance,International,Local,Dropped,Paymethod,LocalBilltype,LongDistanceBilltype,TARGET
0,1,F,S,1,38000.0,N,229.64,24.393333,3,23.56,0.0,206.08,0,CC,Budget,Intnl_discount,Cancelled
1,6,M,M,2,29616.0,N,75.29,49.426667,2,29.78,0.0,45.5,0,CH,FreeLocal,Standard,Current
2,8,M,M,0,19732.8,N,47.25,50.673333,3,24.81,0.0,22.44,0,CC,FreeLocal,Standard,Current
3,11,M,S,2,96.33,N,59.01,56.473333,1,26.13,0.0,32.88,1,CC,Budget,Standard,Current
4,14,F,M,2,52004.8,N,28.14,25.14,1,5.03,0.0,23.11,0,CH,Budget,Intnl_discount,Cancelled


In [6]:
#Extract target column from Customer Info file
y_train = CustInfoData["TARGET"]
X_train = CustInfoData.drop(columns=["TARGET"]) #extracting training data without the target column
                     
print(X_train.shape)
print(textData.shape)
print(textData.head())
print(y_train)

(2070, 16)
(2070, 2)
     ID                                           Comments
0  1309  Does not like the way the phone works. It is t...
1  3556  Wanted to know the nearest store location. Wan...
2  2230  Wants to know how to do text messaging. Referr...
3  2312  Asked how to disable call waiting. referred hi...
4  3327  Needs help learning how to use the phone. I su...
0       Cancelled
1         Current
2         Current
3         Current
4       Cancelled
          ...    
2065    Cancelled
2066    Cancelled
2067    Cancelled
2068    Cancelled
2069    Cancelled
Name: TARGET, Length: 2070, dtype: object


In [7]:
#Tokenize - Split the sentences to lists of words
textData['CommentsTokenized'] = textData['Comments'].apply(word_tokenize)

#export_csv = textData.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\1.TextDataTokenized1.csv')

In [75]:
#Stemming using Snowball Stemmer
# Snowball stemmer does not look at the root word, other stemmers do 
# algorithm stemming; not based on root word
stemmer1 = SnowballStemmer("english")

#Now do stemming - create a new dataframe to store stemmed version
newTextData1=pd.DataFrame()
newTextData1=textData.drop(columns=["CommentsTokenized","Comments"])
newTextData1['CommentsTokenizedStemmed'] = textData['CommentsTokenized'].apply(lambda x: [stemmer1.stem(y) for y in x])

#export_csv = newTextData1.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\2.newTextDataTS1.csv')


In [73]:
#Stemming using Porter Stemmer
stemmer2 = PorterStemmer()

#Now do stemming - create a new dataframe to store stemmed version
newTextData2=pd.DataFrame()
newTextData2=textData.drop(columns=["CommentsTokenized","Comments"])
newTextData2['CommentsTokenizedStemmed'] = textData['CommentsTokenized'].apply(lambda x: [stemmer2.stem(y) for y in x])

#export_csv = newTextData2.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\3.newTextDataTS2.csv')

In [10]:
#Stemming using Lancaster Stemmer
stemmer3 = LancasterStemmer()

#Now do stemming - create a new dataframe to store stemmed version
newTextData3=pd.DataFrame()
newTextData3=textData.drop(columns=["CommentsTokenized","Comments"])
newTextData3['CommentsTokenizedStemmed'] = textData['CommentsTokenized'].apply(lambda x: [stemmer3.stem(y) for y in x])

#export_csv = newTextData3.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\4.newTextDataTS3.csv')

In [11]:
#Join Stemmed Strings
newTextData1['CommentsTokenizedStemmed'] = newTextData1['CommentsTokenizedStemmed'].apply(lambda x: " ".join(x))

#export_csv = newTextData1.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\4.newTextData-Joined.csv')

In [12]:
#construct the term-document matrix
#also drop stop words and do counting using count vectorizer
count_vect = CountVectorizer(stop_words='english',lowercase=False)
TD_counts = count_vect.fit_transform(newTextData1.CommentsTokenizedStemmed)
print(TD_counts.shape)
print(TD_counts.dtype)
print(count_vect.get_feature_names())
#print(TD_counts)
DF_TD_Counts=pd.DataFrame(TD_counts.toarray(),columns=count_vect.get_feature_names())
print(DF_TD_Counts)
#export_csv = DF_TD_Counts.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\5.TD_counts-TokenizedStemmed.csv')

(2070, 354)
int64
['3399', '3g', 'abysm', 'access', 'accessori', 'adapt', 'add', 'addit', 'additon', 'address', 'adit', 'adress', 'advertis', 'afraid', 'alway', 'angel', 'angri', 'ani', 'anoth', 'anyth', 'anytim', 'area', 'asap', 'ask', 'bad', 'basic', 'bateri', 'batteri', 'becaus', 'believ', 'better', 'bigger', 'book', 'bought', 'brain', 'bring', 'built', 'busi', 'button', 'buy', 'cancel', 'cancer', 'car', 'care', 'carrier', 'caus', 'cc', 'cell', 'certain', 'chang', 'charg', 'charger', 'check', 'chip', 'citi', 'claim', 'cleariti', 'cold', 'comapr', 'compani', 'compar', 'competit', 'complain', 'complaint', 'concept', 'connect', 'consisit', 'consist', 'constan', 'contact', 'continu', 'contract', 'correct', 'cost', 'coupl', 'cover', 'coverag', 'creat', 'credit', 'cstmer', 'cstmr', 'current', 'cust', 'custom', 'customr', 'date', 'day', 'dead', 'decent', 'defect', 'deo', 'did', 'die', 'differ', 'difficult', 'digiti', 'direct', 'disabl', 'doe', 'don', 'dont', 'drop', 'dure', 'easier', 'effe

In [13]:
#Compute TF-IDF Matrix
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(TD_counts)
print(X_train_tfidf.shape)
# print(tfidf_transformer.get_feature_names_out())
DF_TF_IDF=pd.DataFrame(X_train_tfidf.toarray(),columns=count_vect.get_feature_names())
print(DF_TF_IDF)
#export_csv= DF_TF_IDF.to_csv('C:\\Users\\Shehjar Raina\\Desktop\\CIS 508 Data Mining\\IA4\\6.TFIDF_counts-TokenizedStemmed.csv')

(2070, 354)
      3399   3g  abysm  access  accessori  adapt  add  addit  additon  \
0      0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
1      0.0  0.0    0.0     0.0    0.27568    0.0  0.0    0.0      0.0   
2      0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
3      0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
4      0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
...    ...  ...    ...     ...        ...    ...  ...    ...      ...   
2065   0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
2066   0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
2067   0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
2068   0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   
2069   0.0  0.0    0.0     0.0    0.00000    0.0  0.0    0.0      0.0   

       address  ...  wish  wll  wold      work  wors  worst  wrong  xvyx  \
0     0.000000  ...   0.0  0.0   0.

In [51]:
# combine the TF-IDF matrix with Customer data - should be done on ID
# TF-IDF DataFrame to be used
print(CustInfoData.shape)
X_train = CustInfoData.drop(columns=["TARGET"]) 
print(X_train.shape)
concat1=pd.concat([textData['ID'],DF_TF_IDF],axis=1)
concat1.head()

combined=pd.merge(X_train, concat1, on='ID')

print(combined.shape)
print(combined)
#combined.to_csv('7.CombinedData1.csv', index = False)

(2070, 17)
(2070, 16)
(2070, 370)
        ID Sex Status  Children  Est_Income Car_Owner   Usage        Age  \
0        1   F      S         1    38000.00         N  229.64  24.393333   
1        6   M      M         2    29616.00         N   75.29  49.426667   
2        8   M      M         0    19732.80         N   47.25  50.673333   
3       11   M      S         2       96.33         N   59.01  56.473333   
4       14   F      M         2    52004.80         N   28.14  25.140000   
...    ...  ..    ...       ...         ...       ...     ...        ...   
2065  3821   F      S         0    78851.30         N   29.04  48.373333   
2066  3822   F      S         1    17540.70         Y   36.20  62.786667   
2067  3823   F      M         0    83891.90         Y   74.40  61.020000   
2068  3824   F      M         2    28220.80         N   38.95  38.766667   
2069  3825   F      S         0    28589.10         N  100.28  15.600000   

      RatePlan  LongDistance  ...  wish  wll  wold   

In [52]:
#one-hot encoding on the categorical variables
categoricalFeatures = combined.select_dtypes(include=['object']).columns.tolist()
print(categoricalFeatures)

['Sex', 'Status', 'Car_Owner', 'Paymethod', 'LocalBilltype', 'LongDistanceBilltype']


In [53]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(handle_unknown='ignore',sparse=False)
Xcat = pd.DataFrame(ohe.fit_transform(combined[categoricalFeatures]),columns=ohe.get_feature_names(),index=combined.index)
combined = pd.concat([combined,Xcat],axis=1)
combined.drop(labels=categoricalFeatures,axis=1,inplace=True)
combined.sample(5)

Unnamed: 0,ID,Children,Est_Income,Usage,Age,RatePlan,LongDistance,International,Local,Dropped,...,x1_S,x2_N,x2_Y,x3_Auto,x3_CC,x3_CH,x4_Budget,x4_FreeLocal,x5_Intnl_discount,x5_Standard
850,1518,1,24141.5,125.87,17.006667,1,28.82,0.0,97.05,0,...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
1601,2922,1,80000.0,37.04,63.353333,1,6.08,0.0,30.95,0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
21,53,1,57063.0,98.1,52.333333,4,16.79,0.0,81.3,0,...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
1814,3348,1,92647.5,32.13,56.046667,4,16.04,5.74,10.34,0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
707,1301,1,56722.8,74.77,60.593333,4,17.37,6.21,51.17,0,...,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0


In [71]:
pd.set_option('display.max_columns',None)
combined

Unnamed: 0,ID,Children,Est_Income,Usage,Age,RatePlan,LongDistance,International,Local,Dropped,3399,3g,abysm,access,accessori,adapt,add,addit,additon,address,adit,adress,advertis,afraid,alway,angel,angri,ani,anoth,anyth,anytim,area,asap,ask,bad,basic,bateri,batteri,becaus,believ,better,bigger,book,bought,brain,bring,built,busi,button,buy,cancel,cancer,car,care,carrier,caus,cc,cell,certain,chang,charg,charger,check,chip,citi,claim,cleariti,cold,comapr,compani,compar,competit,complain,complaint,concept,connect,consisit,consist,constan,contact,continu,contract,correct,cost,coupl,cover,coverag,creat,credit,cstmer,cstmr,current,cust,custom,customr,date,day,dead,decent,defect,deo,did,die,differ,difficult,digiti,direct,disabl,doe,don,dont,drop,dure,easier,effect,encount,end,enemi,equip,everytim,everywher,evrey,exact,expect,expir,explain,facepl,fals,famili,featur,fed,figur,fine,fix,forev,forward,friend,function,furthermor,futur,gave,goat,good,great,gsm,handset,happi,hard,hate,hear,heard,help,higher,highway,hochi,hole,home,hope,horribl,hous,implement,improv,inadequ,includ,info,inform,ing,internet,intersect,issu,june,just,kid,kno,know,lame,later,lctn,learn,leroy,like,line,list,local,locat,locatn,long,los,lost,lot,love,major,make,manag,mani,manual,market,mean,messag,metropolitian,minut,misl,mistak,model,momma,mr,napeleon,near,nearest,need,network,new,news,notic,number,numer,offer,old,om,open,option,ori,ot,outbound,pass,pay,pda,peopl,perform,person,phone,piec,plan,pleas,point,polici,poor,possibl,probabl,problem,proper,provid,provis,purpos,rate,rater,realiz,realli,reason,receiv,recept,recption,reenter,refer,relat,rep,replac,respect,result,rid,right,ring,roam,roll,rubbish,rude,said,sale,say,screen,self,send,servic,shitti,shut,sign,signal,signific,simm,simpli,sinc,site,slow,sold,someon,sometim,soon,speak,speed,start,static,stole,store,stuff,stupid,substant,subtract,suck,suggest,supervisor,support,sure,surpris,suspect,suspend,switch,teach,technic,tell,terribl,test,text,think,thought,ticket,till,time,tire,today,toilet,told,tone,tower,transeff,transf,transfer,travel,tri,trust,turn,uncomfort,understand,unhappi,unlimit,unreli,unwil,upset,usag,use,useless,valu,veri,vm,wa,wait,want,wast,way,weak,web,websit,week,whi,wife,wish,wll,wold,work,wors,worst,wrong,xvyx,year,york,x0_F,x0_M,x1_D,x1_M,x1_S,x2_N,x2_Y,x3_Auto,x3_CC,x3_CH,x4_Budget,x4_FreeLocal,x5_Intnl_discount,x5_Standard
0,1,1,38000.00,229.64,24.393333,3,23.56,0.00,206.08,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.344388,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.336819,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.32972,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.472239,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.327212,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.472239,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.325802,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0
1,6,2,29616.00,75.29,49.426667,2,29.78,0.00,45.50,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320855,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.310995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.243227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271105,0.0,0.0,0.0,0.0,0.0,0.347885,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348322,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
2,8,0,19732.80,47.25,50.673333,3,24.81,0.00,22.44,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320855,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.310995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.243227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271105,0.0,0.0,0.0,0.0,0.0,0.347885,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348322,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
3,11,2,96.33,59.01,56.473333,1,26.13,0.00,32.88,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320855,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.310995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.243227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271105,0.0,0.0,0.0,0.0,0.0,0.347885,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348322,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
4,14,2,52004.80,28.14,25.140000,1,5.03,0.00,23.11,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.320855,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.310995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.243227,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.271105,0.0,0.0,0.0,0.0,0.0,0.347885,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.37653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.348322,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2065,3821,0,78851.30,29.04,48.373333,4,0.37,0.00,28.66,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466708,0.000000,0.0,0.0,0.0,0.443664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.369504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.453766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.214868,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.356121,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
2066,3822,1,17540.70,36.20,62.786667,1,22.17,0.57,13.45,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466708,0.000000,0.0,0.0,0.0,0.443664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.369504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.453766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.214868,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.356121,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
2067,3823,0,83891.90,74.40,61.020000,4,28.92,0.00,45.47,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466708,0.000000,0.0,0.0,0.0,0.443664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.369504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.453766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.214868,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.356121,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0
2068,3824,2,28220.80,38.95,38.766667,4,26.49,0.00,12.46,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.466708,0.000000,0.0,0.0,0.0,0.443664,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.369504,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.264422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.453766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.214868,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.356121,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0


**FEATURE SELECTION - SelectKBest**

With k=25

In [54]:
#Feature Selection - Filter type - SelectKBest
selector1 = SelectKBest(k=25)
new_DF_TF_IDF1 = selector1.fit_transform(combined,y_train)

feature_names_out1 = selector1.get_support(indices=True)
#print(feature_names_out)

DF_TF_IDF_SelectedFeatures1= pd.DataFrame(new_DF_TF_IDF1)
print(selector1.get_feature_names_out())
#print(DF_TF_IDF_SelectedFeatures)

['Children' 'accessori' 'asap' 'better' 'buy' 'expect' 'featur' 'forward'
 'learn' 'locat' 'nearest' 'point' 'rate' 'rep' 'signific' 'store'
 'suggest' 'support' 'teach' 'technic' 'use' 'work' 'x0_M' 'x1_M' 'x1_S']


In [55]:
#Split dataset DF_TF_IDF_SelectedFeatures1
X_train, X_test, Y_train, Y_test = train_test_split(DF_TF_IDF_SelectedFeatures1, y_train, test_size = .2)
print(X_train.shape)
print(X_test.shape)

(1656, 25)
(414, 25)


In [56]:
#Construct Decision Tree using split data with SelectKBest features where k=25
clf = DecisionTreeClassifier()
dt = clf.fit(X_train, Y_train)
clf_predict=clf.predict(X_test)
clf_predict_proba=clf.predict_proba(X_test)
clf_predict_proba=clf_predict_proba[:,1]
print("accuracy Score (training) for Decision Tree:{0:6f}".format(clf.score(X_test,Y_test)))
print()
print("Confusion Matrix for Decision Tree")
print(confusion_matrix(Y_test,clf_predict))

accuracy Score (training) for Decision Tree:0.814010

Confusion Matrix for Decision Tree
[[133  35]
 [ 42 204]]


In [57]:
#Construct Random Forest Model using split data with SelectKBest features where k=25
rfc = RandomForestClassifier()
rfc1 = rfc.fit(X_train, Y_train)
rfc_predict=rfc.predict(X_test)
rfc_predict_proba=rfc.predict_proba(X_test)
rfc_predict_proba=rfc_predict_proba[:,1]
print("accuracy Score (training) for Random Forest:{0:6f}".format(rfc.score(X_test,Y_test)))
print()
print("Confusion Matrix for Random Forest")
print(confusion_matrix(Y_test,rfc_predict))

accuracy Score (training) for Random Forest:0.806763

Confusion Matrix for Random Forest
[[129  39]
 [ 41 205]]


With k=50

In [58]:
#Feature Selection - Filter type - SelectKBest
selector2 = SelectKBest(k=50)
new_DF_TF_IDF2 = selector2.fit_transform(combined,y_train)

feature_names_out2 = selector2.get_support(indices=True)
#print(feature_names_out)

DF_TF_IDF_SelectedFeatures2= pd.DataFrame(new_DF_TF_IDF2)
print(selector2.get_feature_names_out())
#print(DF_TF_IDF_SelectedFeatures)

['Children' 'Est_Income' 'Usage' 'LongDistance' 'International'
 'accessori' 'addit' 'asap' 'batteri' 'becaus' 'better' 'buy' 'cancel'
 'coupl' 'dead' 'disabl' 'expect' 'famili' 'featur' 'forward' 'help'
 'info' 'know' 'learn' 'locat' 'manag' 'nearest' 'need' 'phone' 'point'
 'rate' 'rep' 'said' 'signific' 'store' 'suggest' 'support' 'suspend'
 'teach' 'technic' 'told' 'transfer' 'use' 'want' 'work' 'x0_F' 'x0_M'
 'x1_M' 'x1_S' 'x3_Auto']


In [59]:
#Split dataset DF_TF_IDF_SelectedFeatures2
X_train, X_test, Y_train, Y_test = train_test_split(DF_TF_IDF_SelectedFeatures2, y_train, test_size = .2)
print(X_train.shape)
print(X_test.shape)

(1656, 50)
(414, 50)


In [60]:
#Construct Decision Tree using split data with SelectKBest features where k=50
clf = DecisionTreeClassifier()
dt = clf.fit(X_train, Y_train)
clf_predict=clf.predict(X_test)
clf_predict_proba=clf.predict_proba(X_test)
clf_predict_proba=clf_predict_proba[:,1]
print("accuracy Score (training) for Decision Tree:{0:6f}".format(clf.score(X_test,Y_test)))
print()
print("Confusion Matrix for Decision Tree")
print(confusion_matrix(Y_test,clf_predict))

accuracy Score (training) for Decision Tree:0.874396

Confusion Matrix for Decision Tree
[[156  21]
 [ 31 206]]


In [61]:
#Construct Random Forest Model using split data with SelectKBest features where k=50
rfc = RandomForestClassifier()
rfc1 = rfc.fit(X_train, Y_train)
rfc_predict=rfc.predict(X_test)
rfc_predict_proba=rfc.predict_proba(X_test)
rfc_predict_proba=rfc_predict_proba[:,1]
print("accuracy Score (training) for Random Forest:{0:6f}".format(rfc.score(X_test,Y_test)))
print()
print("Confusion Matrix for Random Forest")
print(confusion_matrix(Y_test,rfc_predict))

accuracy Score (training) for Random Forest:0.888889

Confusion Matrix for Random Forest
[[153  24]
 [ 22 215]]


**FEATURE SELECTION - SelectFromModel

Using Random Forest Classifier with max_features = 25

In [62]:
#Feature Selection - Wrapper type - SelectFromModel
clf1 = RandomForestClassifier()
rf = clf1.fit(combined,y_train)
#print(dt.feature_importances_)
model1 = SelectFromModel(rf, prefit=True, max_features=25, threshold=-np.inf)

X_new1 = model1.transform(combined)
X_new_SelectedFeatures1= pd.DataFrame(X_new1)

In [63]:
#Split dataset X_new_SelectedFeatures1
X_train, X_test, Y_train, Y_test = train_test_split(X_new_SelectedFeatures1, y_train, test_size = .2)
print(X_train.shape)
print(X_test.shape)

(1656, 25)
(414, 25)


In [64]:
#Construct Decision Tree using split data with SelectFromModel
clf = DecisionTreeClassifier()
dt = clf.fit(X_train, Y_train)
clf_predict=clf.predict(X_test)
clf_predict_proba=clf.predict_proba(X_test)
clf_predict_proba=clf_predict_proba[:,1]
print("accuracy Score (training) for Decision Tree:{0:6f}".format(clf.score(X_test,Y_test)))
print()
print("Confusion Matrix for Decision Tree")
print(confusion_matrix(Y_test,clf_predict))

accuracy Score (training) for Decision Tree:0.826087

Confusion Matrix for Decision Tree
[[132  36]
 [ 36 210]]


In [65]:
#Construct Random Forest Model using split data with SelectFromModel
rfc = RandomForestClassifier()
rfc1 = rfc.fit(X_train, Y_train)
rfc_predict=rfc.predict(X_test)
rfc_predict_proba=rfc.predict_proba(X_test)
rfc_predict_proba=rfc_predict_proba[:,1]
print("accuracy Score (training) for Random Forest:{0:6f}".format(rfc.score(X_test,Y_test)))
print()
print("Confusion Matrix for Random Forest")
print(confusion_matrix(Y_test,rfc_predict))

accuracy Score (training) for Random Forest:0.871981

Confusion Matrix for Random Forest
[[139  29]
 [ 24 222]]


Using GradientBoostingClassifier with max_features = 25

In [76]:
#Feature Selection - Wrapper type - SelectFromModel
clf2 = GradientBoostingClassifier(n_estimators=50)
clf = clf2.fit(combined,y_train)
#print(clf.feature_importances_)
model2 = SelectFromModel(clf, prefit=True, max_features=25, threshold=-np.inf)

X_new2 = model2.transform(combined)
X_new_SelectedFeatures2= pd.DataFrame(X_new2)

In [77]:
#Split dataset X_new_SelectedFeatures1
X_train, X_test, Y_train, Y_test = train_test_split(X_new_SelectedFeatures2, y_train, test_size = .2)
print(X_train.shape)
print(X_test.shape)

(1656, 25)
(414, 25)


In [78]:
#Construct Decision Tree using split data with SelectFromModel
clf = DecisionTreeClassifier()
dt = clf.fit(X_train, Y_train)
clf_predict=clf.predict(X_test)
clf_predict_proba=clf.predict_proba(X_test)
clf_predict_proba=clf_predict_proba[:,1]
print("accuracy Score (training) for Decision Tree:{0:6f}".format(clf.score(X_test,Y_test)))
print()
print("Confusion Matrix for Decision Tree")
print(confusion_matrix(Y_test,clf_predict))

accuracy Score (training) for Decision Tree:0.864734

Confusion Matrix for Decision Tree
[[143  30]
 [ 26 215]]


In [79]:
#Construct Random Forest Model using split data with SelectFromModel
rfc = RandomForestClassifier()
rfc1 = rfc.fit(X_train, Y_train)
rfc_predict=rfc.predict(X_test)
rfc_predict_proba=rfc.predict_proba(X_test)
rfc_predict_proba=rfc_predict_proba[:,1]
print("accuracy Score (training) for Random Forest:{0:6f}".format(rfc.score(X_test,Y_test)))
print()
print("Confusion Matrix for Random Forest")
print(confusion_matrix(Y_test,rfc_predict))

accuracy Score (training) for Random Forest:0.896135

Confusion Matrix for Random Forest
[[148  25]
 [ 18 223]]
