Pandelis Ziazopoulos  (larryziazo@gmail.com) - Data Scientist / Physicist


##Download the data and set the appropriate environment

In [None]:
# install kaggle environment and upload from your computer the kaggle.json file (the API you had created before)
! pip install -q kaggle
from google.colab import files
files.upload()

In [None]:
# make a directory and download the data files for this kaggle competition
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle
! chmod 600 ~/.kaggle/kaggle.json

#if another competition then change the last argument of the command below
! kaggle competitions download -c titanic
! ls

In [None]:
# import the basic libraries to open and take a look at the data
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import ShuffleSplit, learning_curve, train_test_split
from sklearn.metrics import f1_score, log_loss, accuracy_score, balanced_accuracy_score, confusion_matrix, classification_report
import math
from collections import Counter


## Get to know the data - take a quick look

In [None]:
# pandas is the best way to take a quick look of the data
train_set = pd.read_csv("train.csv")
prediction_set = pd.read_csv("test.csv")
N = len(train_set)
NN = len(prediction_set)

In [None]:
train_set.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
# print some basic information about the problem
print("Number of passengers : ", N)
print()

print("Passenger without validated cabin for their name : ", train_set['Cabin'].isna().sum() )
print()
print("People embarked from Cherbourg : ", Counter(list(train_set['Embarked']))['C'])
print("People embarked from Queenstown : ", Counter(list(train_set['Embarked']))['Q'])
print("People embarked from Southampton: ", Counter(list(train_set['Embarked']))['S'])
print("People with no embarkation port : ", N - Counter(list(train_set['Embarked']))['S'] - Counter(list(train_set['Embarked']))['Q'] - Counter(list(train_set['Embarked']))['C'])

Number of passengers :  891

Passenger without validated cabin for their name :  687

People embarked from Cherbourg :  168
People embarked from Queenstown :  77
People embarked from Southampton:  644
People with no embarkation port :  2


In [None]:
# define a function that will help you plot the variables in order to see the balance (or imbalance) of the variables
def plot_bar(dictionary, title):
  a_dictionary = dictionary
  keys = a_dictionary.keys()
  values = a_dictionary.values()
  plt.bar(keys, values)
  plt.title(title)
  return


In [None]:
plot_bar(Counter(list(train_set['Pclass'])), "Distribution of ticket classes")

In [None]:
plot_bar(Counter(list(train_set['Sex'])), "Distribution of sexes")

In [None]:
plot_bar(Counter(list(train_set['Survived'])), "Distribution of the survival")

In [None]:
plot_bar(Counter(list(train_set['SibSp'])), "Distribution of siblings / spouses aboard on the ship")

In [None]:
plot_bar(Counter(list(train_set['Parch'])), "Distribution of parents / children aboard on the ship")

In [None]:
# x-data
def from_panda_to_x(dataframe):
  x1 = np.array(list(dataframe['Pclass']))
  x2 = np.array(list(dataframe['Sex']))
  x3 = np.array(list(dataframe['Age']))
  x4 = np.array(list(dataframe['SibSp']))
  x5 = np.array(list(dataframe['Parch']))
  x6 = np.array(list(dataframe['Ticket']))
  x7 = np.array(list(dataframe['Fare']))
  x8 = np.array(list(dataframe['Cabin']))
  x9 = np.array(list(dataframe['Embarked']))
  x_data = [x1,x2,x3,x4,x5,x6,x7,x8,x9]
  return x1,x2,x3,x4,x5,x6,x7,x8,x9,x_data

x1,x2,x3,x4,x5,x6,x7,x8,x9,x_data = from_panda_to_x(train_set)
# y-data
y = np.array(list(train_set['Survived']))


# mark with CAPS the data of the final test set
X1,X2,X3,X4,X5,X6,X7,X8,X9,X_data = from_panda_to_x(prediction_set)
# y-data
# there are no y-data for the final test for this task


In [None]:
# check the types of all values of the variables in order to see if all values are of the same type
def variable_type_checker(variable):
  counter = 0
  different_types = []
  for i in variable:
    if type(i) == type(variable[0]):
      pass
    else:
      counter +=1
    if type(i) in different_types:
      pass
    else:
      different_types.append(type(i))
  return counter , different_types


In [None]:
for j in x_data:
  print(variable_type_checker(j))

print()
print()

for j in X_data:
  print(variable_type_checker(j))


## Data cleaning - setting the right format for the data to be processed

In [None]:
import re 
x6_numerical = []
problems = [] # keep track of the problematic values (the indices)
counter = 0
for i in range(N):
  ticket_number = re.findall('\d+', x6[i])
  if len(ticket_number) > 0 :
    x6_numerical.append(int(ticket_number[-1]))

  # keep track of the problematic values
  else:
    counter +=1
    problems.append(i)
    # keep the nan values as they are, in order to keep the arrangement of the values between the different variables
    x6_numerical.append(x6[i])

print(len(x6) == len(x6_numerical)) # check that all are ok till now, in order to change the variable from x6 to x6_numerical
print()

print("Number of problems with the ticket number : ", len(problems))
print()

pd.set_option("display.max_rows", None, "display.max_columns", None)
for i in problems:
  print(train_set.loc[i])
  print()  

In [None]:
# follow the same process for the final test data 
X6_numerical = []
PROBLEMS ,counter = [], 0
for i in range(NN):
  ticket_number = re.findall('\d+', X6[i])
  if len(ticket_number) > 0 :
    X6_numerical.append(int(ticket_number[-1]))
  else:
    counter +=1
    PROBLEMS.append(i)
    X6_numerical.append(X6[i])
print(len(X6) == len(X6_numerical))


In [None]:
# turn string type variable to integer
x2_numerical = []
counter = 0
for i in x2:
  if i == 'male':
    x2_numerical.append(0)
  elif i == 'female':
    x2_numerical.append(1)
  else:
    counter +=1
print(counter)
print(len(x2) == len(x2_numerical))

X2_numerical = []
counter = 0
for i in X2:
  if i == 'male':
    X2_numerical.append(0)
  elif i == 'female':
    X2_numerical.append(1)
  else:
    counter +=1
print(len(X2) == len(X2_numerical))


In [None]:
x9_numerical = []
counter = 0
for i in range(N):
  if x9[i] == 'C':
    x9_numerical.append(0)
  elif x9[i] == 'Q':
    x9_numerical.append(1)
  elif x9[i] == 'S':
    x9_numerical.append(2)
  else:
    counter +=1
    x9_numerical.append(x9[i]) # 2 nan values
    problems.append(i)
print(counter)
print(len(x9) == len(x9_numerical))


X9_numerical = []
counter = 0
for i in range(NN):
  if X9[i] == 'C':
    X9_numerical.append(0)
  elif X9[i] == 'Q':
    X9_numerical.append(1)
  elif X9[i] == 'S':
    X9_numerical.append(2)
  else:
    counter +=1
    X9_numerical.append(X9[i]) # 2 nan values
    PROBLEMS.append(i)
print(len(X9) == len(X9_numerical))


In [None]:
# for convenience let's round the float variables to integers 
# (if final accuracy is insufficient at the end we will return to the actual float values)
x3_int = []
counter = 0
for i in x3:
  if np.isnan(i):
    x3_int.append(i)
    counter += 1
  else:
      x3_int.append(int(np.round(i)))
print("Number of people without recorded age : ", counter)
print()

x7_int = []
counter = 0
for i in x7:
  if np.isnan(i):
    x7_int.append(i)
    counter += 1
  else:
    x7_int.append(int(np.round(i)))
print("Number of people without recorded fare (zero fare {0$} is regarded as recorded fare) : ", counter)
print()


In [None]:
# Same process for the final test data 
X3_int = []
counter = 0
for i in X3:
  if np.isnan(i):
    X3_int.append(i)
    counter += 1
  else:
      X3_int.append(int(np.round(i)))

X7_int = []
counter = 0
for i in X7:
  if np.isnan(i):
    X7_int.append(i)
    counter += 1
  else:
    X7_int.append(int(np.round(i)))

We see that there are plenty of people without recorded age. We suspect that the age variable is a crucial one so we do not want to drop this variable, not the values that are Nan.So, for every Nan value, we arbitrarily append to him/her a random age.

In [None]:
import random
# make a list with the number of ages without the nan values in order to be able to randomly select ages from this list
x3_int_nonzero = []
for i in x3_int:
  if np.isnan(i):
    pass
  else:
    x3_int_nonzero.append(i)

for i in range(N):
  # find the nan values
  if np.isnan(x3_int[i]):
    x3_int[i] = random.choice(x3_int_nonzero)
  else:
    pass

# Same for final test data
X3_int_nonzero = []
for i in X3_int:
  if np.isnan(i):
    pass
  else:
    X3_int_nonzero.append(i)
for i in range(NN):
  if np.isnan(X3_int[i]):
    X3_int[i] = random.choice(X3_int_nonzero)
  else:
    pass


For the time being, the variable x8 (cabin) will be passed by due to the many Nan values. In case it is needed later we will take it into account later on.

In [None]:
all_train_data = np.array([x1,np.array(x2_numerical),np.array(x3_int),x4,x5,np.array(x6_numerical),np.array(x7_int),np.array(x9_numerical),np.array(y) ])
all_train_dataframe = pd.DataFrame(all_train_data)
# the variable passengerid is taken into account only for the final tst set since it is needed only for the submission file we will create later
all_test_data = np.array([X1,np.array(X2_numerical),np.array(X3_int),X4,X5,np.array(X6_numerical),np.array(X7_int),np.array(X9_numerical), list(prediction_set['PassengerId'])])
all_test_dataframe = pd.DataFrame(all_test_data)

all_train_dataframe


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890
0,3,1,3,1,3,3,1,3,3,2,3,1,3,3,3,2,3,2,3,3,2,2,3,1,3,3,3,1,3,3,1,1,3,2,1,1,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,1,2,1,1,2,3,2,3,3,1.0,1,3,1,3,2,3,3,3,2,3,2,3,3,3,3,3,2,3,3,3,3,1,2,3,3,3,1,3,3,3,1,3,3,3,1,1,2,2,3,3,1,3,3,3,3,3,3,3,1,3,3,3,3,3,3,2,1,3,2,3,2,2,1,3,3,3,3,3,3,3,3,2,2,2,1,1,3,1,3,3,3,3,2,2,3,3,2,2,2,1,3,3,3,1,3,3,3,3,3,2,3,3,3,3,1,3,1,3,1,3,3,3,1,3,3,1,2,3,3,2,3,2,3,1,3,1,3,3,2,2,3,2,1,1,3,3,3,2,3,3,3,3,3,3,3,3,3,1,3,2,3,2,3,1,3,2,1,2,3,2,3,3,1,3,2,3,2,3,1,3,2,3,2,3,2,2,2,2,3,3,2,3,3,1,3,2,1,2,3,3,1,3,3,3,1,1,1,2,3,3,1,1,3,2,3,3,1,1,1,3,2,1,3,1,3,2,3,3,3,3,3,3,1,3,3,3,2,3,1,1,2,3,3,1,3,1,1,1,3,3,3,2,3,1,1,1,2,1,1,1,2,3,2,3,2,2,1,1,3,3,2,2,3,1,3,2,3,1,3,1,1,3,1,3,1,1,3,1,2,1,2,2,2,2,2,3,3,3,3,1,3,3,3,3,1,2,3,3,3,2,3,3,3,3,1,3,3,1,1,3,3,1,3,1,3,1,3,3,1,3,3,1,3,2,3,2,3,2,1,3,3,1,3,3,3,2,2,2,3,3,3,3,3,2,3,2,3,3,3,3,1,2,3,3,2,2,2,3,3,3,3,3,3,3,2,2,3,3,1,3,2,3,1,1,3,2,1,2,2,3,3,2,3,1,2,1,3,1,2,3,1,1,3,3,1,1,2,3,1,3,1,2,3,3,2,1,3,3,3,3,2,2,3,1,2,3,3,3,3,2,3,3,1,3,1,1,3,3,3,3,1,1,3,3,1,3,1,3,3,3,3,3,1,1,2,1,3,3,3,3,1,1,3,1,2,3,2,3,1,3,3,1,3,3,2,1,3,2,2,3,3,3,3,2,1,1,3,1,1,3,3,2,1,1,2,2,3,2,1,2,3,3,3,1,1,1,1,3,3,3,2,3,3,3,3,3,3,3,2,1,1,3,3,3,2,1,3,3,2,1,2,1,3,1,2,1,3,3,3,1,3,3,2,3,2,3,3,1,2,3,1,3,1,3,3,1,2,1,3,3,3,3,3,2,3,3,2,2,3,1,3,3,3,1,2,1,3,3,1,3,1,1,3,2,3,2,3,3,3,1,3,3,3,1,3,1,3,3,3,2,3,3,3,2,3,3,2,1,1,3,1,3,3,2,2,3,3,1,2,1,2,2,2,3,3,3,3,1,3,1,3,3,2,2,3,3,3,1,1,3,3,3,1,2,3,3,1,3,1,1,3,3,3,2,2,1,1,3,1,1,1,3,2,3,1,2,3,3,2,3,2,2,1,3,2,3,2,3,1,3,2,2,2,3,3,1,3,3,1,1,1,3,3,1,3,2,1,3,2,3,3,3,2,2,3,2,3,1,3,3,3,1,3,1,1,3,3,3,3,3,2,3,2,3,3,3,3,1,3,1,1,3,3,3,3,3,3,1,3,2,3,1,3,2,1,3,3,3,2,2,1,3,3,3,1,3,2,1,3,3,2,3,3,1,3,2,3,3,1,3,1,3,3,3,3,2,3,1.0,3,2,3,3,3,1,3,3,3,1,3,2,1,3,3,3,3,3,2,1,3,3,3,1,2,3,1,1,3,3,3,2,1,3,2,2,2,1,3,3,3,1,1,3,2,3,3,3,3,1,2,3,3,2,3,3,2,1,3,1,3
1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,1,1,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,0,0,1,1,1,1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1.0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,1,1,1,1,1,1,0,0,0,0,1,0,0,0,1,1,0,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,1,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,1,1,1,0,0,0,1,1,0,1,0,0,0,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,0,0,1,0,0,1,1,0,0,0,1,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,0,0,0,1,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,1,1,0,1,1,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,1,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,1.0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,1,0,1,0,0,1,0,1,1,0,0
2,22,38,26,35,35,26,54,2,27,14,4,58,20,39,14,55,2,24,31,38,35,34,15,28,8,38,33,19,43,4,40,20,24,66,28,42,23,21,18,14,40,27,18,3,19,26,48,38,24,18,7,21,49,29,65,56,21,28,5,11,22,38.0,45,4,58,47,29,19,17,26,32,16,21,26,32,25,17,3,1,30,22,29,22,28,17,33,16,28,23,24,29,20,46,26,59,24,71,23,34,34,28,26,21,33,37,28,21,38,38,31,47,14,22,20,17,21,70,29,24,2,21,54,32,32,54,12,19,24,24,45,33,20,47,29,25,23,19,37,16,24,24,22,24,19,18,19,27,9,36,42,51,22,56,40,44,51,16,30,32,27,44,40,26,17,1,9,22,45,17,28,61,4,1,21,56,18,16,50,30,36,18,19,9,1,4,16,11,45,40,36,32,19,19,3,44,58,18,42,22,24,28,28,34,46,18,2,32,26,16,40,24,35,22,30,48,31,27,42,32,30,16,27,51,4,38,22,19,20,18,24,35,29,59,5,24,34,44,8,19,33,63,17,29,22,30,44,25,24,37,54,43,29,62,30,41,29,51,30,35,50,27,3,52,40,39,36,16,25,58,35,58,25,41,37,15,63,45,14,7,35,65,28,16,19,29,33,30,22,42,22,26,19,36,24,24,48,24,2,23,50,31,22,19,32,30,1,18,17,30,30,24,18,26,28,43,26,24,54,31,40,22,27,30,22,28,36,61,36,31,16,22,46,38,16,44,4,29,41,45,45,2,24,28,25,36,24,40,25,3,42,23,28,15,25,48,28,22,38,18,27,40,29,45,35,50,30,60,56,22,24,25,18,19,22,3,21,22,27,20,19,42,1,32,35,0,18,1,36,4,17,36,21,28,23,24,22,31,46,23,28,39,26,21,28,20,34,51,3,21,25,36,23,33,29,44,50,34,18,30,10,54,21,29,28,18,7,28,19,50,32,28,23,42,17,50,14,21,24,64,31,45,20,25,28,58,4,13,34,5,52,36,20,30,49,24,29,65,21,50,29,48,34,47,48,49,38,24,56,4,1,16,38,33,23,22,32,34,29,22,2,9,18,50,63,25,36,35,58,30,9,20,21,55,71,21,33,54,32,25,24,17,21,15,37,16,18,33,20,28,26,29,17,36,54,24,47,34,18,36,32,30,22,22,44,9,40,50,20,39,23,2,40,17,22,30,7,45,30,58,22,36,9,11,32,50,64,19,7,33,8,17,27,22,22,22,62,48,70,39,36,30,40,28,42,54,24,19,29,51,32,62,53,36,28,16,19,34,39,14,32,25,39,54,36,24,18,47,60,22,32,35,52,47,9,37,36,25,49,19,49,24,22,1,44,35,36,30,27,22,40,39,30,35,26,35,24,34,26,4,26,27,42,20,21,21,61,57,21,26,21,80,51,32,22,9,28,32,31,41,36,20,24,2,36,1,48,19,56,24,23,39,18,21,36,18,24,33,32,23,58,50,40,47,36,20,32,25,36,43,18,40,31,70,31,17,18,24,18,43,36,49,27,20,14,60,25,14,19,18,15,31,4,8,25,60,52,44,38,49,42,18,35,18,25,26,39,45,42,22,33,24,45,48,29,52,19,38,27,54,33,6,17,34,50,27,20,30,29,25,25,29,11,16,23,23,28,48,35,51,29,39,36,21,24,31,70,16,30,19,31,4,6,33,23,48,1,28,18,34,33,23,41,20,36,16,51,29,30,19,32,24,48,57,26,54,18,9,5,49,43,13,17,29,21,25,25,18,8,1,46,16,16,32,42,25,39,49,31,30,30,34,31,11,0,27,31,39,18,39,33,26,39,35,6,30,18,23,31,43,10,52,27,38,27,2,19,22,1,18,62.0,15,1,27,23,18,39,21,47,32,39,20,16,30,34,17,42,49,35,28,40,4,74,9,16,44,18,45,51,24,70,41,21,48,30,24,42,27,31,29,4,26,47,33,47,28,15,20,19,39,56,25,33,22,28,25,39,27,19,13,26,32
3,1,1,0,1,0,0,0,3,0,1,1,0,0,1,0,0,4,0,1,0,0,0,0,0,3,1,0,3,0,0,0,1,0,0,1,1,0,0,2,1,1,1,0,1,0,0,1,0,2,1,4,0,1,1,0,0,0,0,1,5,0,0.0,1,3,0,1,0,0,4,2,0,5,0,1,0,0,0,0,0,0,0,0,0,0,0,3,1,0,3,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,1,0,1,0,1,0,0,0,1,0,4,2,0,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,8,0,0,0,0,4,0,0,1,0,0,0,4,1,0,0,1,3,0,0,0,8,0,4,2,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,8,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,0,3,1,0,0,4,0,0,1,0,0,0,1,1,0,0,0,2,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,4,1,0,0,0,4,1,0,0,0,0,0,0,0,1,0,0,4,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,2,0,0,0,1,0,1,1,0,0,2,1,0,1,0,1,0,0,1,0,0,0,1,8,0,0,0,1,0,2,0,0,2,1,0,1,0,0,0,1,3,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,1,0,1,0,0,0,1,1,0,0,3,1,0,0,0,0,0,0,0,1,0,0,5,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,3,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,1,0,1,1,2,2,1,0,1,0,1,0,0,0,0,0,2,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,1,1,0,0,5,0,0,0,1,3,1,0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,0,0,0,0,0,0,0,4,4,1,1,0,1,0,1,1,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,1,2,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,1,1,2,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,1,0,0,3,0,2,1,0,0,0,0,0,0,0,0,0,2,0,1,0,0,2,0,0,0,1,2,0,0,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,5,1,1,4,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,3,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,2,1,0,1,1,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,4,1,0,0,0,8,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,4,0,0,0,1,0,3,1,0,0,0,4,0,0,0,0,0.0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,0,0,1,4,0,1,0,1,0,1,0,0,0,2,1,0,8,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,1,2,0,1,0,0,5,0,0,1,0,0,0,0,0,0,0,1,5,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,1,0,0,0,2,2,0,0.0,0,2,0,1,0,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,0,2,0,0,0,0,2,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0,2,2,0,0,0,0,2,0,1,0,0,0,2,1,0,0,0,1,2,1,4,0,0,0,1,1,0,0,1,1,0,0,0,2,0,2,1,2,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,2,2,0,0,0,1,0,2,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,2,0,0,0,0,0,2,1,0,1,0,0,0,2,1,0,0,0,1,2,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,4,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,2,0,0,0,0,2,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,2,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,2,2,3,4,0,1,0,0,0,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,2,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,1,2,0,2,0,0,0,2,2,2,2,0,0,0,0,0,1,1,2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,2,0,1,0,0,0,0,0,2,0,1,0,0,0,0,1,0,0,0,0,0,0,0,2,0,5,0,0,0,0,2,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,5,0,0,0,2,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,6,1,0,0,0,2,1,2,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,2,0,0,1,1,0,0,0,1,1,0,0,2,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,1,0,0,0,2,0,0,0,1,2,0,0,0,2,0,0,0,0,0,0,1,0,1,2,1,0,0,0,0,0,0,0,0,0,2,0,0,0,1,0,2,1,0,0,1,1,0,0,2,0,0.0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,0,1,0,2,0,1,1,0,1,1,0,3,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,5,0,0,2,0,0
5,21171,17599,3101282,113803,373450,330877,17463,349909,347742,237736,9549,113783,2151,347082,350406,248706,382652,244373,345763,2649,239865,248698,330923,113788,349909,347077,2631,19950,330959,349216,17601,17569,335677,24579,17604,113789,2677,2152,345764,2651,7546,11668,349253,2123,330958,23567,370371,14311,2662,349237,3101295,39886,17572,2926,113509,19947,31026,2697,34651,2144,2669,113572.0,36973,347088,17605,2661,29395,3464,3101281,315151,33111,2144,14879,2680,1601,348123,349208,374746,248738,364516,345767,345779,330932,113059,14885,3101278,6608,392086,19950,343275,343276,347466,5734,2315,364500,374910,17754,17759,231919,244367,349245,349215,35281,7540,3101276,349207,343120,312991,349249,371110,110465,2665,324669,4136,2627,3101294,370369,11668,17558,347082,14879,54510,237736,27267,35281,2651,370372,17369,2668,347061,349241,3101307,3337,228414,29178,2133,11752,113803,7534,17593,2678,347081,3101279,365222,231945,33112,350043,6608,230080,244310,1166,113776,11206,851,265302,17597,35851,392090,315037,2343,371362,33595,347068,315093,3101295,363291,113505,347088,17318,1601,111240,382652,347742,3101280,17764,350404,4133,17595,250653,LINE,2343,2131,347077,230136,315153,113767,370365,111428,364849,349247,234604,28424,350046,230080,17610,17569,368703,4579,370370,248747,345770,2343,3101264,2628,3540,347054,3101278,2699,367231,112277,3101311,13528,21174,250646,367229,35273,3101283,243847,11813,14208,392089,220367,21440,349234,19943,4348,751,21173,236171,4133,36973,347067,237442,347077,29566,6609,26707,31921,28665,1585,2665,367230,14263,3101275,2694,19928,347071,250649,11751,244252,362316,347054,113514,3336,370129,2650,17585,110152,17755,230433,384461,347077,110413,112059,382649,17248,3101295,347083,17582,17760,113798,LINE,250644,17596,370375,13502,347073,239853,382652,2673,336439,347464,345778,10482,113056,349239,345774,349206,237798,370373,19877,11967,2163,349236,349233,17612,2693,113781,19988,17558,9234,367226,LINE,226593,2466,113781,17421,17758,3381,17485,11767,17608,250651,349243,13529,347470,244367,29011,36928,16966,21172,349219,234818,248738,2343,17760,345364,28551,363291,111361,367226,113043,17582,345764,17611,349225,113776,16966,7598,113784,230080,19950,248740,244361,229236,248733,31418,386525,37671,315088,7267,113510,2695,349237,2647,345783,113505,237671,330931,330980,347088,2167,2691,3101310,370365,7076,110813,2626,14313,17477,11765,3101267,323951,17760,349909,17604,7077,113503,2648,347069,17757,2653,3101293,113789,349227,14879,2144,27849,367655,1748,113760,350034,3101277,35273,9549,350052,350407,28403,244278,240929,3101289,341826,4137,3101279,315096,28664,347064,29106,312992,4133,349222,394140,19928,239853,3101269,343095,28220,250652,28228,345773,349254,13032,315082,347080,370129,34244,2003,250655,364851,392078,110564,376564,3085,3101274,13507,113760,6608,29106,19950,18723,13529,345769,347076,230434,65306,33638,250644,113794,2666,113786,34651,65303,113051,17453,2817,349240,13509,17464,13531,371060,19952,364506,111320,234360,2816,3101306,239853,113792,36209,2666,323592,315089,34651,541,7553,110465,31027,3460,350060,3101298,2144,239854,3594,4134,11967,4133,19943,11771,18509,37671,65304,3101317,113787,17609,45380,2627,36947,6212,113781,350035,315086,364846,330909,4135,110152,17758,26360,111427,4001,1601,382651,3101316,17473,17603,349209,36967,34260,371110,226875,349242,12749,349252,2624,111361,2700,367232,14258,17483,3101296,29104,26360,2641,2690,2668,315084,13529,113050,17761,364498,13568,5735,347082,347082,2908,17761,693,2908,2146,363291,33112,17421,244358,330979,2620,347085,113807,11755,17757,110413,345572,372622,349251,218629,392082,392087,48871,349205,349909,2686,350417,752,11769,17474,14312,20589,358585,243880,13507,2689,3101286,237789,17421,28403,13049,3411,110413,237565,13567,14973,3235,3101273,36947,3902,364848,29037,345773,248727,LINE,2664,17485,243847,349214,113796,364511,111426,349910,349246,113804,2123,17582,347082,3101305,367230,370377,364512,220845,347080,3336,230136,31028,2659,11753,2653,350029,54636,36963,219533,13502,349224,334912,27042,347743,13214,112052,347088,237668,3101292,31921,3101295,376564,350050,17477,347088,1601,2666,17572,349231,13213,751,2314,349221,231919,8475,330919,365226,14879,349223,364849,29751,35273,17611,2623,5727,349210,3101285,14879,234686,312993,3536,19996,29750,12750,24580,244270,239856,349912,342826,4138,2144,17755,330935,17572,6563,2144,29750,2123,3101295,349228,350036,24160,17474,349256,1601,2672,113800,248731,363592,35852,17421,348121,17757,17475,2691,36864,350025,250655,223596,17476,113781,2661,17482,113028,19996,7545,250647,348124,17757,34218,36568,347062,248727,350048,12233,250643,113806,315094,31027,36866,236853,3101271,24160,2699,239855,28425,233639,54636,6608,17755,349201,349218,16988,19877,17608,376566,3101288,5735,2673,250648,113773,335097,29103,392096,345780,349204,220845,250649,350042,29108,363294,110152,358585,3101272,2663,113760,347074,13502,112379,364850,371110,8471,345781,350047,3,2674,29105,347078,383121,364516,36865,24160,2687,17474,113501,6607,3101312,374887,3101265,382652,2315,17593,12460,239865,2343,17600,349203,28213,17465,349244,2685,345773,250647,31921,113760,2625,347089,347063,112050,347087,248723,113806,3474,48871,28206,347082,364499,112058,3101290,2079,7075,347088,12749,315098,19972,392096,3101295,368323,1601,2079,367228,113572.0,2659,29106,2671,347468,2223,17756,315097,392092,1601,11774,3101287,3,113798,2683,315090,5547,2343,349213,248727,17453,347082,347060,2678,17592,244252,392091,36928,113055,2666,2629,350026,28134,17466,2343,233866,236852,2149,17590,345777,347742,349248,11751,695,345765,3381,2667,7534,349212,349217,11767,230433,349257,7552,34068,392076,382652,211536,112053,6607,111369,370376
6,7,71,8,53,8,8,52,21,11,30,17,27,8,31,8,16,29,13,18,7,26,13,8,36,21,31,7,263,8,8,28,147,8,10,82,52,7,8,18,11,9,21,8,42,8,8,16,8,22,18,40,8,77,26,62,36,10,7,28,47,7,80.0,83,28,28,15,10,8,8,9,10,47,74,14,56,8,8,8,29,12,9,10,8,47,10,16,34,8,263,8,8,8,61,21,7,8,35,63,23,26,8,8,77,9,8,8,8,8,8,24,52,14,8,10,14,8,8,21,248,31,74,8,30,13,77,11,8,7,22,7,8,7,14,26,13,15,26,53,9,79,15,8,16,7,12,37,8,34,26,13,13,67,8,14,7,61,8,8,9,70,16,16,8,9,40,21,55,28,26,56,34,29,11,8,31,8,25,29,13,0,70,15,31,39,22,50,16,27,16,8,13,13,8,26,28,147,8,8,8,13,10,70,6,7,8,10,16,19,8,31,7,21,7,13,8,113,8,27,76,10,8,13,8,8,90,9,10,7,13,25,83,8,14,31,10,8,26,26,10,12,14,16,10,7,7,90,8,14,53,26,7,10,27,16,20,15,79,86,512,26,8,31,80,0,8,10,40,8,153,136,31,0,20,30,8,78,8,0,29,20,8,8,10,8,26,9,10,8,13,8,79,91,13,9,8,28,7,152,30,248,8,23,0,12,8,152,111,109,24,57,83,262,26,8,26,8,26,14,165,134,7,8,12,29,70,136,6,13,21,58,23,28,153,18,134,8,67,134,8,36,26,263,13,13,13,13,13,16,16,9,9,35,7,18,7,10,55,13,8,8,28,28,14,7,16,7,75,7,8,69,55,6,8,136,21,82,7,212,4,8,228,16,8,52,8,74,47,13,8,12,120,8,8,113,17,8,8,26,10,13,8,8,10,16,9,21,8,19,8,25,8,7,90,0,8,8,32,13,13,24,8,8,8,14,20,7,26,26,8,8,27,16,26,7,56,120,34,19,263,10,26,10,8,13,8,82,20,27,19,30,28,20,28,89,8,8,27,52,10,8,27,8,38,13,8,7,0,27,8,19,7,9,28,14,10,52,21,7,8,12,47,0,8,10,91,25,90,30,8,16,20,7,30,50,8,14,78,15,152,8,9,8,8,10,86,109,26,27,23,56,8,8,26,59,7,34,10,24,26,8,94,8,7,58,7,8,10,222,8,12,26,7,7,22,9,26,27,106,14,50,71,31,31,26,106,26,26,14,21,37,111,26,8,7,8,27,40,228,80,17,8,8,14,8,8,24,8,21,7,8,10,51,26,8,8,14,13,56,14,8,30,111,26,40,9,80,15,79,8,8,7,78,7,8,26,24,33,0,7,57,27,8,42,8,27,16,8,30,42,153,31,7,16,8,8,65,14,16,39,10,14,53,16,8,16,32,12,78,8,8,30,7,30,0,28,13,8,26,40,16,8,69,28,56,19,77,8,36,8,8,8,23,8,8,7,74,8,16,13,113,134,7,26,7,8,74,13,8,8,52,39,52,10,13,0,8,8,10,47,512,8,77,9,47,39,42,40,10,8,211,57,13,56,7,27,14,8,8,111,8,228,26,14,8,8,26,14,26,152,15,50,27,52,9,13,8,228,10,16,8,33,7,13,13,53,9,21,8,26,8,211,19,0,13,13,16,34,512,8,8,30,79,262,16,8,71,20,13,53,8,23,12,10,8,65,14,8,12,8,86,14,7,7,120,8,78,40,8,24,8,10,8,10,7,23,8,8,12,8,211,7,57,30,23,7,7,7,29,21,79,8,26,70,31,8,13,26,9,7,24,13,26,120,9,7,8,0,8,13,53,8,24,10,31,8,0,8,37,6,28,94,9,0,12,40,7,56,37,8,80.0,14,19,7,8,8,83,9,8,56,30,8,10,31,6,9,8,70,8,33,89,31,8,15,39,26,9,165,27,19,7,14,12,26,70,13,13,14,50,10,11,8,53,5,9,24,7,10,8,8,83,26,8,11,10,7,29,13,30,23,30,8
7,2,0,2,2,2,1,2,2,2,0,2,2,2,2,2,2,1,2,2,0,2,2,1,2,2,2,0,2,1,2,0,0,1,2,0,2,0,2,2,0,2,2,0,0,1,2,1,1,0,2,2,2,0,2,0,2,2,0,2,2,0,,2,2,0,0,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,2,2,2,2,2,2,2,2,2,2,2,1,2,0,2,2,0,2,1,2,0,2,2,2,0,2,2,0,1,2,0,2,0,2,2,2,2,0,2,2,2,0,0,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,0,2,2,0,2,2,2,0,2,2,2,2,1,2,1,2,2,2,2,2,0,0,1,2,1,2,2,2,2,0,2,2,2,0,1,0,2,2,2,2,1,0,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,1,2,2,0,1,2,2,2,2,2,2,2,2,2,0,0,2,0,2,1,2,2,2,1,2,2,2,2,2,2,2,2,0,1,2,2,2,1,2,1,2,2,2,2,0,2,2,2,1,2,0,0,2,2,0,0,2,2,0,1,1,2,1,2,2,0,0,0,0,0,0,2,2,2,2,2,2,2,0,2,2,1,2,2,0,2,2,2,0,1,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,0,2,2,2,1,1,2,0,0,2,1,2,0,0,1,0,0,2,2,0,2,0,2,0,0,2,0,0,2,2,2,2,2,2,1,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,2,2,2,0,1,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,0,0,2,0,2,2,2,1,2,2,2,2,2,2,2,2,1,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,0,2,2,0,2,2,2,2,2,0,2,0,0,2,2,2,2,1,1,2,2,0,2,2,2,2,1,2,2,0,2,2,2,1,2,2,2,2,0,0,0,1,2,2,2,2,2,0,0,0,2,2,2,0,2,0,2,2,2,2,0,2,2,0,2,2,0,2,1,0,2,2,0,0,2,2,1,2,2,2,2,2,2,2,0,2,2,2,2,1,2,2,2,2,0,2,2,0,2,0,0,2,2,0,2,2,2,0,2,1,2,2,2,2,0,0,2,2,2,2,0,2,2,2,0,2,2,2,1,1,2,2,2,2,2,2,0,2,0,2,2,2,1,2,2,1,2,2,0,2,2,2,2,2,2,2,2,0,2,2,0,0,2,0,2,2,2,2,2,1,1,2,2,1,2,0,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,1,0,2,2,2,0,2,2,2,2,2,0,2,0,2,2,2,1,0,2,0,2,0,1,2,2,2,2,2,0,0,2,2,2,2,2,0,2,1,2,2,2,2,2,2,2,2,1,2,2,2,0,2,2,2,2,2,0,2,2,2,2,0,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,0,1,1,2,2,2,2,0,2,2,1,2,1,2,0,2,2,2,2,2,2,1,2,0,1,2,2,0,2,2,2,2,0,2,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,1,2,0,1,,0,2,0,2,2,0,2,2,2,0,2,2,0,0,2,2,2,0,2,0,2,2,0,2,2,2,2,2,0,0,2,2,2,2,2,2,0,2,2,2,2,2,2,2,0,0,2,2,2,0,2,2,2,2,2,1,2,2,2,0,1
8,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,0,1,0,1,0,1,1,1,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,1,1,0,1,0,0,1.0,0,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,1,1,0,0,1,0,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,1,1,0,1,0,1,0,0,0,1,0,1,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,0,0,1,1,1,0,1,1,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0,1,1,1,1,0,0,0,0,0,0,1,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,1,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,1,0,1,1,1,0,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,0,0,1,0,1,0,0,1,0,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,0,0,1,1,0,1,1,0,0,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1,1,0,1,1,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,1,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,1,1,1,0,1,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,1,1,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,1,1,0,0,1,1,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,1.0,1,1,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,1,1,1,1,0,0,0,1,0,0,1,1,0,0,1,0,1,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,1,0,1,0


In [None]:
# pop out all the problematic 'passengers' (columns with nan or other values)
for i in problems:
  all_train_dataframe.pop(i)
for i in PROBLEMS:
  all_test_dataframe.pop(i)

print(all_train_dataframe.shape)
print(all_test_dataframe.shape) # no y values -> 1 column less


In [None]:
#make rows columns and columns rows
all_train_dataframe = all_train_dataframe.transpose()
all_test_dataframe = all_test_dataframe.transpose()
# from the final test we cannot drop the nan values since the submission file MUST have specific length (exactly as the test set given)
all_test_dataframe = all_test_dataframe.fillna(0) 

# convert all values from string to integers
all_train_dataframe = all_train_dataframe.astype(int)
all_test_dataframe = all_test_dataframe.astype(int)

# separate the last column as the y-values
y_data = all_train_dataframe.pop(8)
# the passenger id values are needed for the final submission
passenger_id = all_test_dataframe.pop(8) 
print(all_train_dataframe.shape)
print(y_data.shape)

# 885

## Split the data - Get ready the classifiers



In [None]:
# splitting the data
def train_dev_test_splitter(X,Y,fraction=0.2,seed=1):
    x_nontest, xxx_test, y_nontest, yyy_test = train_test_split(X,Y,stratify=Y,random_state=seed, test_size=fraction)
    return x_nontest, y_nontest, xxx_test, yyy_test

#x_nontest, y_nontest, x_test, y_test = train_dev_test_splitter(all_train_dataframe, y_data, fraction=0.15,seed=2)


In [None]:
# import the classifiers you wantto test (in our case the basic ones and the easiest to implement)
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

classifier_names = ["Logistic Regression", "Nearest Neighbour", "Decision Tree", "Random Forest",#"MLP",
                    "AdaBoost","Naive Bayes"]#, "Linear SVM"] # SVM is a very "heavy" classifier -> makes the code slower

classifiers = [LogisticRegression(C=1, max_iter=200),
              KNeighborsClassifier(10),
              DecisionTreeClassifier(max_depth=15),
              RandomForestClassifier(max_depth=15, n_estimators=10, max_features=1),
              #MLPClassifier(alpha=1, max_iter=1000),
              AdaBoostClassifier(),
              GaussianNB()]
              #,SVC(kernel="linear", C=0.025)]


In [None]:
# define a function that wil iterate over the classifiers many times
# and as a result we will consider the mean of the scores for each classifier

def get_test_scores(x_data, y_data, classifiers , names, n_iterations=1):
  all_acc , all_f1 = [], []
  for i in range(n_iterations):
    xtrain, ytrain, xtest, ytest = train_dev_test_splitter(x_data, y_data, fraction=0.1, seed = i)
    acc_scores, f1_scores = [], []
    all_predictions = []
    for name, classifier in zip(names, classifiers):
      classifier.fit(xtrain, ytrain)
      predictions = classifier.predict(xtest)
      f1score = f1_score(predictions, ytest)
      accuracy = accuracy_score(predictions, ytest)
      #loss = log_loss(predictions, ytest)
      f1_scores.append(f1score)
      acc_scores.append(accuracy)
      #all_predictions.append(predictions)
    all_f1.append(f1_scores)
    all_acc.append(acc_scores)
    # always put a 'print message' when running many iteration in order to know that the colab has not stopped running and that everything is good 
    print("the classification of ", name, " has finished for iteration ", i)
  return all_acc,all_f1


## Train the classifiers and get the scores 

In [None]:
acc, f1 = get_test_scores(all_train_dataframe, y_data, classifiers, classifier_names, n_iterations=20)
final_scores = pd.DataFrame([np.mean(f1, axis=0), np.mean(acc, axis=0)])
final_scores.index= ['F1' ,'Acc']
final_scores.columns = classifier_names
final_scores = final_scores.transpose()

In [None]:
final_scores

Unnamed: 0,F1,Acc
Logistic Regression,0.1035,0.628652
Nearest Neighbour,0.485434,0.654494
Decision Tree,0.726806,0.794382
Random Forest,0.722534,0.802247
AdaBoost,0.734598,0.798876
Naive Bayes,0.310034,0.66236


In [None]:
# so Random Forest and AdaBoost are the hiest scoring classifiers

## Get the final predictions

In [None]:
# train again the highest scoring classifiers and then get their predictions in order to submit them
def get_data_return_predictions(classifier, train_data = all_train_dataframe, ytrain = y_data, test_data = all_test_dataframe):
  classifier.fit(train_data, ytrain)
  return classifier.predict(test_data)

In [None]:
# the highest scoring classifiers
RandomForest_predictions = get_data_return_predictions(classifiers[3])
AdaBoost_predictions = get_data_return_predictions(classifiers[4])


In [None]:
# check the format of the csv file that MUST have
pd.read_csv('gender_submission.csv').head()


Unnamed: 0,PassengerId,Survived
0,892,0
1,893,1
2,894,0
3,895,0
4,896,1


In [None]:
# create the .csv file with the right format
RandomForest_submission = pd.DataFrame()
RandomForest_submission['PassengerId'] = passenger_id
RandomForest_submission['Survived'] = RandomForest_predictions
RandomForest_submission.to_csv('submission.csv', index=False)

! kaggle competitions submit -c titanic -f submission.csv -m "Message"


In [None]:
AdaBoost_submission = pd.DataFrame()
AdaBoost_submission['PassengerId'] = passenger_id
AdaBoost_submission['Survived'] = AdaBoost_predictions
AdaBoost_submission.to_csv('submission.csv', index=False)

! kaggle competitions submit -c titanic -f submission.csv -m "Message"
# 0.74641 ~ 150 position at the leaderboard


## One step further - Simple hyperparameter tuning for Random Forest classifier


In [None]:
# define the hyperparameter values 

criterion = ['gini', 'entropy']
max_depth = [None, 3, 5, 10, 15]
min_samples_split = [2, 5 , 10]
min_samples_leaf = [1,2,5]
min_weight_fraction_leaf = [0. , 0.2]
max_leaf_nodes = [None , 2]
n_estimators = [10, 50 , 100]

# grid search over all combinations - save the highest F1 scoring set
best_f1_score = 0
counter = 0
x_nontest, y_nontest, x_test, y_test = train_dev_test_splitter(all_train_dataframe, y_data, fraction=0.1,seed=1)
for a in criterion :
  for b in max_depth:
    for c in min_samples_split:
      for d in min_samples_leaf:
        for e in min_weight_fraction_leaf:
          for f in max_leaf_nodes:
            for g in n_estimators:
              gridsearch_classifier = RandomForestClassifier(criterion = a, max_depth = b, min_samples_split = c ,
                                                             min_samples_leaf = d , min_weight_fraction_leaf = e , 
                                                             max_leaf_nodes = f , n_estimators = g )
              gridsearch_classifier.fit(x_nontest, y_nontest)
              preds = gridsearch_classifier.predict(x_test)
              f1 = f1_score(preds, y_test)
              # hold the set of hyperparameters that gave the best f1 score
              if f1 > best_f1_score:
                best_f1_score = f1
                best_parameters = a,b,c,d,e,f,g
              
              # in order to check that everything runs, and there is no problem (has not crashed, etc.)
              counter += 1
              if counter % 50 == 0: 
                print (" All ok till now ! ", counter ) 


In [None]:
# take a quick look at the parameters
print(best_f1_score)
print()
for i in best_parameters:
  print(i)

0.819672131147541

gini
15
5
1
0.0
None
50


The F1 score from 0.722534 before hyperparameter tuning went up to 0.819672 after it. Almost 10% HIGHER !!!

In [None]:
# train the classifier, this time knowing which hyperparamters give the highest scores 
final_classifier = RandomForestClassifier(criterion = best_parameters[0], 
                                          max_depth = best_parameters[1],
                                          min_samples_split = best_parameters[2] ,
                                          min_samples_leaf = best_parameters[3] ,
                                          min_weight_fraction_leaf = best_parameters[4] , 
                                          max_leaf_nodes = best_parameters[5] ,
                                          n_estimators = best_parameters[6] )

final_predictions = get_data_return_predictions(final_classifier)
final_submission = pd.DataFrame()
final_submission['PassengerId'] = passenger_id
final_submission['Survived'] = final_predictions
final_submission.to_csv('submission.csv', index=False)


In [None]:
! kaggle competitions submit -c titanic -f submission.csv -m "Message"
# even better ! 0.75598 Score ~ 145 at the leaderboard 


  0% 0.00/2.77k [00:00<?, ?B/s]100% 2.77k/2.77k [00:00<00:00, 13.2kB/s]
Successfully submitted to Titanic - Machine Learning from Disaster

In [None]:
# Last thought ... make hyperparameter tuning for the DecisionTree classifier