<a href="https://colab.research.google.com/github/renatoIFPB/Curso_IA/blob/main/projeto_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import random
import seaborn as sns
from seaborn import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn import metrics
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")

df = load_dataset('taxis') #Carrega o dataset na variavel df
df = df.dropna(how='any',axis=0) #Deleta linhas com valor vazio
df = df.drop(['pickup','dropoff'], axis=1) #Deleta as colunas 'pickup', 'dropoff'

# column_headers = list(df.columns.values)
# print("The Column Header :", column_headers)

In [None]:
pickup_borough_zone = df.groupby(["pickup_borough"], as_index=False).agg(
    total_pickup_borough = ("pickup_borough","count"))

pickup_borough_zone

sns.barplot(x = "pickup_borough", y = "total_pickup_borough", data = pickup_borough_zone)

In [None]:
pickup_borough_type_payment = df.groupby(["payment"], as_index=False).agg(
    total_pickup_borough = ("pickup_borough","count"))

pickup_borough_type_payment

sns.barplot(x = "payment", y = "total_pickup_borough", data = pickup_borough_type_payment)

In [None]:
pickup_borough_payment = df.groupby(["payment"], as_index=False).agg(
    total_money = ("total","sum"))

pickup_borough_payment

sns.barplot(x = "payment", y = "total_money", data = pickup_borough_payment)

In [5]:
labelencoder = LabelEncoder()
payment_df = pd.DataFrame(df, columns=['payment'])
df['payment'] = labelencoder.fit_transform(payment_df['payment'])

labelencoder = LabelEncoder()
pickup_zone_df = pd.DataFrame(df, columns=['pickup_zone'])
df['pickup_zone'] = labelencoder.fit_transform(pickup_zone_df['pickup_zone'])

labelencoder = LabelEncoder()
dropoff_zone_df = pd.DataFrame(df, columns=['dropoff_zone'])
df['dropoff_zone'] = labelencoder.fit_transform(dropoff_zone_df['dropoff_zone'])

column_transformer = make_column_transformer((OneHotEncoder(), ['color', 'pickup_borough', 'dropoff_borough']), remainder='passthrough')
df = column_transformer.fit_transform(df)
columns_names = column_transformer.get_feature_names_out()
df = pd.DataFrame(data=df, columns=columns_names)

In [None]:
y = df['remainder__payment'] # extrai a primeira coluna, que é o label para a variavel y
X = df.drop(['remainder__payment'], axis=1) #deleta a coluna payment e copia o df para a variavel x

In [None]:
from sklearn import tree

count1=0
count2=0
for i in range(10): #Loop para executar a divisão dos dados de testes/treino e execução
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random.randint(1,50000), stratify=y) # 80% treino e 20% teste

  model1 = tree.DecisionTreeClassifier()
  model2 = tree.DecisionTreeClassifier(criterion="log_loss",splitter="random")

  model1 = model1.fit(X_train, y_train)
  model2 = model2.fit(X_train, y_train)

  result1 = model1.predict(X_test)
  result2 = model1.predict(X_test)

  acc1 = metrics.accuracy_score(result1, y_test)
  acc2 = metrics.accuracy_score(result2, y_test)

  count1+=float(acc1 * 100)
  count2+=float(acc2 * 100)

print('Media:',f'{count1 / 10:.3f}%') #Media das 10 execuções
print('Media:',f'{count2 / 10:.3f}%') #Media das 10 execuções

In [None]:
from sklearn.neighbors import KNeighborsClassifier

count1=0
count2=0
for i in range(10): #Loop para executar a divisão dos dados de testes/treino e execução
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random.randint(1,50000), stratify=y) # 80% treino e 20% teste

  model1 = KNeighborsClassifier(n_neighbors=11, algorithm='brute')
  model2 = KNeighborsClassifier(n_neighbors=3, metric='euclidean', algorithm='auto')

  model1.fit(X_train, y_train)
  model2.fit(X_train, y_train)

  result1 = model1.predict(X_test)
  result2 = model2.predict(X_test)

  acc1 = metrics.accuracy_score(result1, y_test)
  acc2 = metrics.accuracy_score(result2, y_test)

  count1+=float(acc1 * 100)
  count2+=float(acc2 * 100)

print('Media:',f'{count1 / 10:.3f}%') #Media das 10 execuções
print('Media:',f'{count2 / 10:.3f}%') #Media das 10 execuções

In [None]:
from sklearn import svm

count1=0
count2=0
for i in range(10): #Loop para executar a divisão dos dados de testes/treino e execução
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random.randint(1,50000), stratify=y) # 80% treino e 20% teste

  model1 = svm.SVC(kernel='linear')
  model2 = svm.SVC(kernel='sigmoid', C=2, gamma="auto")

  model1.fit(X_train, y_train)
  model2.fit(X_train, y_train)

  result1 = model1.predict(X_test)
  result2 = model2.predict(X_test)

  acc1 = metrics.accuracy_score(result1, y_test)
  acc2 = metrics.accuracy_score(result2, y_test)

  count1+=float(acc1 * 100)
  count2+=float(acc2 * 100)

print('Media:',f'{count1 / 10:.3f}%') #Media das 10 execuções
print('Media:',f'{count2 / 10:.3f}%') #Media das 10 execuções

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

count1=0
count2=0
for i in range(10): #Loop para executar a divisão dos dados de testes/treino e execução
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random.randint(1,50000), stratify=y) # 80% treino e 20% teste

  model1 = RandomForestClassifier(criterion="entropy", max_features="log2", min_samples_split=3)
  model2 = GradientBoostingClassifier(loss="exponential", learning_rate=0.5, criterion="squared_error")

  model1.fit(X_train, y_train)
  model2.fit(X_train, y_train)

  result1 = model1.predict(X_test)
  result2 = model2.predict(X_test)

  acc1 = metrics.accuracy_score(result1, y_test)
  acc2 = metrics.accuracy_score(result2, y_test)

  count1+=float(acc1 * 100)
  count2+=float(acc2 * 100)

print('Media:',f'{count1 / 10:.3f}%') #Media das 10 execuções
print('Media:',f'{count2 / 10:.3f}%') #Media das 10 execuções