In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model as lm
from sklearn import metrics as m
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

def convert_to_dataframe(file):
    return pd.read_csv(file)

def get_features(data):
    random.seed()
    pred = data[:]
    pred.drop(['Status'],axis=1,inplace=True)
    pred.drop(['Event'],axis=1,inplace=True)    
    pred.drop(['Latitude'],axis=1,inplace=True)
    pred.drop(['Longitude'],axis=1,inplace=True)
    pred.drop(['ID'],axis=1,inplace=True)
    pred.drop(['Name'],axis=1,inplace=True)
    pred.drop(['Time'],axis=1,inplace=True)
    pred.drop(['Date'],axis=1,inplace=True)
    return pred.columns

def randomforest(data):
    x=data[get_features(data)]
    y=data['Status']
    x_train, x_test,y_train, y_test = train_test_split(x,y, test_size=0.2)
    rfc = RandomForestClassifier(n_estimators=80)
    rfc.fit(x_train,y_train)
    hurricane_prediction = rfc.predict(x_test)
    # Make predictions using the testing set
    final = pd.DataFrame({'Actual': y_test, 'Predicted': hurricane_prediction})
    print(final.head(5))
    return m.accuracy_score(hurricane_prediction,y_test)*100
      
def decisiontree(data):
    x=data[get_features(data)]
    y=data['Status']
    x_train, x_test,y_train, y_test = train_test_split(x,y, test_size=0.2)
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(x_train,y_train)
    y_predict = dtc.predict(x_test)
    # Make predictions using the testing set
    final = pd.DataFrame({'Actual': y_test, 'Predicted': y_predict})
    print(final.head(5))
    return m.accuracy_score(y_predict,y_test)*100

if __name__=='__main__':
    file1 = convert_to_dataframe('pacific.csv')
    resf = randomforest(file1)
    res= decisiontree(file1)
    print ( "Random Forest Classifier",resf)
    print ( "Decision Tree Classifier",res)
    file1.Status = pd.Categorical(file1.Status,ordered=True)
    file1.Status = file1.Status.cat.codes
    resf = randomforest(file1)
    res= decisiontree(file1)
    print ( "Random Forest Classifier with categorical data",resf)
    print ( "Decision Tree Classifier with categorical data",res)
    file2 = convert_to_dataframe('atlantic.csv')
    resf = randomforest(file2)
    res = decisiontree(file2)
    print ( "Random Forest Classifier",resf)
    print ( "Decision Tree Classifier",res)
    # Pre-Processing
    file2 = convert_to_dataframe('atlantic.csv')
    file2.Status = pd.Categorical(file2.Status,ordered=True)
    file2.Status = file2.Status.cat.codes
    resf = randomforest(file2)
    res= decisiontree(file2)
    print ( "Random Forest Classifier with categorical data",resf)
    print ( "Decision Tree Classifier with categorical data",res)   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


      Actual Predicted
7147      TS        TS
23540     TD        TD
14306     TS        TS
6161      TS        TS
9818      TS        TS


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


      Actual Predicted
4308      TS        TS
4124      TS        TS
14532     TD        TD
1377      TD        TD
23067     TD        TD
Random Forest Classifier 95.60061208875287
Decision Tree Classifier 95.50497322111707


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


       Actual  Predicted
7584       10         10
15876      10         10
17176      11         11
21972       5          5
22783      11         11


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


       Actual  Predicted
9995        3          3
12921      11         11
22939       3          3
21008      10         10
1200        3          3
Random Forest Classifier with categorical data 95.86840091813313
Decision Tree Classifier with categorical data 95.90665646518745


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


      Actual Predicted
30019     TS        TS
10999     TS        TS
18666     HU        HU
15371     EX        HU
45471     HU        HU


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


      Actual Predicted
17075     EX        TD
42020     TD        TD
4483      HU        HU
34559     TS        TS
41494     HU        HU
Random Forest Classifier 88.07657061399043
Decision Tree Classifier 87.98493025150188


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


       Actual  Predicted
38442       6          6
14962       6          6
22277       7          7
14305       6          6
6767        2          2


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


       Actual  Predicted
7383        2          2
11800       7          7
21918       2          2
3134        7          7
7319        2          2
Random Forest Classifier with categorical data 88.58568373892679
Decision Tree Classifier with categorical data 87.27217187659097
