# Trabalho 2 de Aprendizagem Automática


In [48]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
import matplotlib.pylab as plt
import seaborn as sns
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from scipy.stats import uniform,randint
from sklearn.neural_network import MLPClassifier

In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
train.head()

In [16]:
X_train = train.iloc[:, :-1].values
y_train = train.iloc[:, -1].values
X_test = test.values
test_ID = test["id"].values

#### Random Forrest

In [None]:
n_estimators = [800, 900, 1000]
criterion = ['gini', 'entropy']
max_features = ['log2', 'sqrt']
max_depth = [ 5,10,15]
min_samples_split = [2, 5,10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True, False]
param_grid= {
    'n_estimators': n_estimators,
    'criterion': criterion,
    'max_depth': max_depth,
    'min_samples_split': min_samples_split,
    'min_samples_leaf': min_samples_leaf,
    'max_features': max_features,
    'bootstrap': bootstrap
}
print(param_grid)


In [42]:

rf_Model = RandomForestClassifier()

In [None]:
rf_grid_search = GridSearchCV(
    estimator=rf_Model,
    param_grid=param_grid,
    cv=3,
    verbose=2,
    n_jobs=-1 
)
rf_grid_search.fit(X_train, y_train)
rf_grid_search.best_params_

In [None]:
results_df = pd.DataFrame(rf_grid_search.cv_results_)
heatmap_data1 = results_df.pivot_table(
    index='param_max_depth',
    columns='param_n_estimators',
    values='mean_test_score'
)
plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data1, annot=True, cmap='viridis', fmt=".4f")
plt.title("HeatMap n_estimators vs max_depth")
plt.xlabel('n_estimators')
plt.ylabel('max_depth')
plt.show()

In [None]:
best_rf_model = rf_grid_search.best_estimator_

#### Gradient Boosting Classifier

In [None]:
param_grid2 = {
    'learning_rate': [0.005, 0.01, 0.02],
    'max_depth': [2, 3, 4],
    'max_features': ['sqrt', 'log2'],
    'min_samples_leaf': [1, 2, 5],
    'min_samples_split': [2, 5, 10],
    'n_estimators': [900,1000,1200],
    'subsample': [0.7, 0.8, 0.9]
}
print(param_grid2)

In [10]:
gbc_model = GradientBoostingClassifier()

In [None]:
gbc_grid = GridSearchCV(
    estimator= gbc_model,
    param_grid=param_grid2,
    cv=3,
    verbose=2,
    n_jobs=-1,
    
)
gbc_grid.fit(X_train,y_train)
gbc_grid.best_params_

In [37]:
best_gbc_model = gbc_grid.best_estimator_

#### Neural Network

In [None]:
param_grid3 = {
    'hidden_layer_sizes': [(100,), (100, 50), (100, 100), (150, 100, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam'],
    'learning_rate_init': [0.001, 0.0001],
    'batch_size': [32, 64, 128],
    'learning_rate': ['adaptive']
}
print(param_grid3)


In [None]:
mlpc_model = MLPClassifier()

In [None]:
mlp_grid = RandomizedSearchCV(
    estimator=mlpc_model,
    param_distributions=param_grid3,
    cv=3,
    n_iter=200,
    random_state=42,
    verbose=2,
    n_jobs=-1,

)
mlp_grid.fit(X_train, y_train)
mlp_grid.best_params_

In [None]:
best_mlp_model = mlp_grid.best_estimator_

### Modelo RandomForrest 

In [None]:
RF = RandomForestClassifier(n_estimators=600,
    criterion='entropy',
    min_samples_split= 5,
    min_samples_leaf= 2,
    max_features= 'sqrt',
    max_depth= 10,
    bootstrap= False
)
RF.fit(X_train,y_train)
y_pred = best_rf_model.predict(X_test)
print(f'Exatidao de Teste -: {RF.score(X_train,y_train):.3f}')

### Modelo Gradient Boosting

In [None]:
GBC = GradientBoostingClassifier(
    learning_rate= 0.05,
    max_depth= 5,
    max_features= 'sqrt',
    min_samples_leaf= 2,
    min_samples_split= 5,
    n_estimators= 1000,
    subsample= 0.8
    )
GBC.fit(X_train,y_train)
y_pred2 = GBC.predict(X_test)
print(f'Exatidão do Teste -: {GBC.score(X_train,y_train):.3f}')

### Criação do ficheiro de submissão

In [67]:
ids_for_test = test['id']

submissao = pd.DataFrame({
    "id": ids_for_test,
    "floresta": y_pred
})
submissao.to_csv('submissao4.csv', index=False)



In [27]:
ids_for_test = test['id']
submissaoGBC = pd.DataFrame({
    "id": ids_for_test,
    "floresta": y_pred2
})
submissaoGBC.to_csv('submissaoGBC2.csv',index=False)