<a href="https://colab.research.google.com/github/pgcd2296/Pokemon_GO/blob/main/Pokemon_Go.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Train.csv

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn import tree
from imblearn.over_sampling import SMOTE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/Pokemon/train.csv')

In [None]:
def covert_binary (dt): 
    '''Convert all True and False into binary'''

    dt.replace({False: 0, True: 1}, inplace=True)

    return dt

In [None]:
data = covert_binary(df)

In [None]:
def data_dummies (dt): 
    ''' Creating dummies to fit categorical data into our model'''

    new = dt[['appearedTimeOfDay','appearedDayOfWeek','city','continent','weather','weatherIcon']]
    df_categ_dummies = pd.get_dummies(new,drop_first=True)
    dt.drop(['appearedTimeOfDay','appearedDayOfWeek','city','continent','weather','weatherIcon'], axis = 1, inplace = True)
    join = pd.concat([dt,df_categ_dummies], axis=1)
    
    
    return join

In [None]:
poke = data_dummies(data)

In [None]:
def model_prep (dt): 
    '''Dropping ID columns and full date'''

    dt.drop(['pokemonId','appearedLocalTime','_id'], axis = 1, inplace = True)
 
    return dt

In [None]:
pokemon = model_prep(poke)

In [None]:
def ss_columns (dt): 
    '''Selecting columns for Standar Scaler'''

    sc = dt[['windSpeed','population_density','gymDistanceKm', 'pokestopDistanceKm','temperature','pressure']]
 
    return sc

In [None]:
pokemon_ss = ss_columns(pokemon)

In [None]:
def drop_to_prep (dt): 
    '''Dropping pokemon_ss, dropping time columns that can create collinearity and dropping cellid columns  '''

    dt.drop(['cellId_90m',	'cellId_180m',	'cellId_370m',	'cellId_730m',	'cellId_1460m',	'cellId_2920m',	'cellId_5850m','windBearing','latitude', 'longitude', 'terrainType','sunriseMinutesMidnight','sunriseHour','sunriseMinute','sunriseMinutesSince','sunsetMinutesMidnight','sunsetHour','sunsetMinute','sunsetMinutesBefore','appearedTimeOfDay_evening','appearedTimeOfDay_morning','appearedTimeOfDay_night','appearedDayOfWeek_Saturday','appearedDayOfWeek_Sunday','appearedDayOfWeek_Thursday','appearedDayOfWeek_Tuesday','appearedDayOfWeek_Wednesday','appearedDayOfWeek_dummy_day','windSpeed','population_density','gymDistanceKm', 'pokestopDistanceKm','temperature','pressure'], axis = 1, inplace = True)
    
    return dt

In [None]:
pokemon = drop_to_prep(pokemon)

In [None]:
def pokemon_scaler (dt): 
    '''Using standard scaler to standarize columns'''

    ss = StandardScaler()
    df_scaled = pd.DataFrame(ss.fit_transform(dt),columns = dt.columns)
    
    return df_scaled

In [None]:
pokemon_standard = pokemon_scaler(pokemon_ss)

In [None]:
def concat_data (dt): 
    '''Concatenating standarized data with rest of dataframe'''

    conc = pd.concat([pokemon,dt], axis=1)
    
    return conc

In [None]:
pokemon_concat = concat_data(pokemon_standard)

In [None]:
def drop_class (dt): 
    '''Drop class for PCA & Classifier'''

    dt.drop(columns=['class'], axis = 1, inplace = True)
    
    return dt

In [None]:
pokemon_concat = drop_class(pokemon_concat)

In [None]:
def Full_PCA (dt): 
    '''Entire PCA process'''

    pca = PCA().fit(dt)
    pca.n_components_
    pca = PCA(n_components=10).fit(dt)
    transformacion = pca.transform(dt)
    df_transformado = pd.DataFrame(transformacion)
    
    return df_transformado

In [None]:
 df_transform = Full_PCA(pokemon_concat) 

In [None]:
X = df_transform
y = pokemon['class']

In [None]:
sm = SMOTE() 
X_os,y_os= sm.fit_resample(X, y)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X_os, y_os, 
                                                    test_size=0.35)

In [None]:
forest = RandomForestClassifier()
forest.fit(x_train, y_train)

RandomForestClassifier()

In [None]:
y_pred = forest.predict(x_test)
df_predict = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

In [None]:
uniqueValues = df_predict['Predicted'].unique() 

In [None]:
class_names = ['19',  '96',  '16',  '10',  '13', '129',  '41',  '23',  '43', '133']
print(classification_report(y_test, y_pred, target_names=class_names))

              precision    recall  f1-score   support

          19       1.00      1.00      1.00      5598
          96       0.95      1.00      0.97      5680
          16       0.93      0.88      0.91      5547
          10       0.93      0.89      0.91      5655
          13       0.99      1.00      0.99      5576
         129       0.99      1.00      1.00      5486
          41       1.00      1.00      1.00      5668
          23       0.99      1.00      0.99      5642
          43       0.99      1.00      1.00      5576
         133       0.99      1.00      1.00      5677

    accuracy                           0.98     56105
   macro avg       0.98      0.98      0.98     56105
weighted avg       0.98      0.98      0.98     56105



# Test.csv

In [None]:
test = pd.read_csv('/content/drive/MyDrive/Pokemon/test.csv')

In [None]:
data2 = covert_binary(test)

In [None]:
poke2 = data_dummies(data2)

In [None]:
poke2.drop(['appearedLocalTime','_id'], axis = 1, inplace = True)

In [None]:
pokemon2 = poke2

In [None]:
pokemon2_ss = ss_columns(pokemon2)

In [None]:
pokemon2 = drop_to_prep(pokemon2)

In [None]:
pokemon_standard2 = pokemon_scaler(pokemon2_ss)

In [None]:
pokemon_concat2 = concat_data(pokemon_standard2)

In [None]:
pokemon_concat2 = drop_class(pokemon_concat2)

In [None]:
def clean_dataset(df):
    "Correcting inf values"
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [None]:
pokemon_concat_clean =  clean_dataset(pokemon_concat2)

In [None]:
df_transform2 = Full_PCA(pokemon_concat_clean) 

In [None]:
y_pred = forest.predict(df_transform2)

In [None]:
df_predicted = pd.DataFrame({'Predicted': y_pred})

In [None]:
def pred_csv(dt):
    '''Exporting test model to csv'''
    dt.to_csv('pred.csv')

    return dt

In [None]:
pred_csv(df_predicted) 

Unnamed: 0,Predicted
0,16
1,133
2,16
3,16
4,16
...,...
9997,96
9998,16
9999,16
10000,16
