In [68]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
import gdown
from sklearn.cluster import KMeans
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import (mean_squared_error, r2_score, accuracy_score, confusion_matrix, f1_score, classification_report)
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC


In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
url = 'https://drive.google.com/uc?id=1p-TGJx4b6HK5JhotLcdtLF3mdJOUPiE8'
output = "SolarPrediction.csv"
gdown.download(url, output, quiet=False)

df = pd.read_csv(
    'SolarPrediction.csv',
    sep=',',
    na_values=['?'],
    low_memory=False,
    on_bad_lines='skip'
)

Downloading...
From: https://drive.google.com/uc?id=1p-TGJx4b6HK5JhotLcdtLF3mdJOUPiE8
To: /content/SolarPrediction.csv
100%|██████████| 2.96M/2.96M [00:00<00:00, 227MB/s]


In [None]:
print(df.columns.tolist())

['UNIXTime', 'Data', 'Time', 'Radiation', 'Temperature', 'Pressure', 'Humidity', 'WindDirection(Degrees)', 'Speed', 'TimeSunRise', 'TimeSunSet']


In [None]:
## CP002_001_SERS.pdf
## Parte 1

In [None]:
y = df["Radiation"]

In [None]:
X = df[["Temperature", "Pressure", "Humidity", "WindDirection(Degrees)", "Speed"]]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
Models = {
    "LinearRegression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(random_state=0),
    "Random Forest": RandomForestRegressor(random_state=0, n_estimators=100)
}

In [None]:
for name, model in Models.items():
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)

  r2 = r2_score(y_test, y_pred)
  mse = mean_squared_error(y_test, y_pred)
  rmse = np.sqrt(mse)

  print(f"Modelo: {name}")
  print(f"R²: {r2: .2f}")
  print(f"MSE: {mse: .2f}")
  print(f"RMSE: {rmse: .2f}")


Modelo: LinearRegression
R²:  0.57
MSE:  43953.69
RMSE:  209.65
Modelo: Decision Tree
R²:  0.52
MSE:  48815.01
RMSE:  220.94
Modelo: Random Forest
R²:  0.74
MSE:  26056.46
RMSE:  161.42


In [None]:
## O modelo que melhor explica é o Random Forest, modelo não linear.

In [None]:
url = "https://drive.google.com/uc?id=1gB4AXCUOaJvMFZe--Xlo42q92_DhYuhq"
output = "T1.csv"
gdown.download(url, output, quiet=False)

df2 = pd.read_csv(
    'T1.csv',
    sep=',',
    na_values=['?'],
    low_memory=False,
    on_bad_lines='skip'
)

Downloading...
From: https://drive.google.com/uc?id=1gB4AXCUOaJvMFZe--Xlo42q92_DhYuhq
To: /content/T1.csv
100%|██████████| 3.97M/3.97M [00:00<00:00, 34.6MB/s]


In [None]:
print(df.columns.tolist())

['Date/Time', 'LV ActivePower (kW)', 'Wind Speed (m/s)', 'Theoretical_Power_Curve (KWh)', 'Wind Direction (°)']


In [None]:
## CP002_001_SERS.pdf
## Parte 2

In [None]:
threshold = 1000
df2["stability"] = (df2["LV ActivePower (kW)"] >= threshold).astype(int)

In [None]:
y = df2["stability"]

In [None]:
X = df2[['LV ActivePower (kW)', 'Wind Speed (m/s)', 'Theoretical_Power_Curve (KWh)', 'Wind Direction (°)']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, shuffle=True)

In [None]:
Models = {
    "Decision Tree": DecisionTreeClassifier(random_state=0),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

for name, model in Models.items():
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)

  acc = accuracy_score(y_test, y_pred)
  cm = confusion_matrix(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)

  print(f"Modelo: {name}")
  print(f"Acurácia: {acc: .2f}")
  print(f"Matriz de confusao: {cm}")
  print(f"F1 Score: {f1: .2f}")

Modelo: Decision Tree
Acurácia:  1.00
Matriz de confusao: [[5441    0]
 [   0 4665]]
F1 Score:  1.00
Modelo: KNN
Acurácia:  1.00
Matriz de confusao: [[5430   11]
 [   7 4658]]
F1 Score:  1.00
Modelo: Logistic Regression
Acurácia:  1.00
Matriz de confusao: [[5441    0]
 [   0 4665]]
F1 Score:  1.00


In [None]:
## O resultado saiu perfeito, não sei se está correto, mas os melhores modelos dentre os 3 são : Decision Tree e Logistic Regression


In [None]:
## CP002_002_SERS.pdf
## Exercicio 1

In [None]:
median_radiation = df['Radiation'].median()
df['Radiation_Class'] = df['Radiation'].apply(lambda x: 1 if x > median_radiation else 0)

In [None]:
y = df['Radiation_Class']

In [None]:
X = df[['Temperature', 'Pressure', 'Humidity', 'WindDirection(Degrees)', 'Speed']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [69]:
Models = {
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC()
}

for model_name, model in Models.items():
    print(f"Treinando {model_name}...")

    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)

    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Acurácia do {model_name}: {accuracy:.2f}")
    print(f"Matriz de Confusão do {model_name}:{cm}")
    print(f"Relatório de Classificação do {model_name}:{report}")

Treinando Decision Tree...
Acurácia do Decision Tree: 0.8298
Matriz de Confusão do Decision Tree:
[[4082  871]
 [ 798 4055]]
Relatório de Classificação do Decision Tree:
              precision    recall  f1-score   support

           0       0.84      0.82      0.83      4953
           1       0.82      0.84      0.83      4853

    accuracy                           0.83      9806
   macro avg       0.83      0.83      0.83      9806
weighted avg       0.83      0.83      0.83      9806

--------------------------------------------------
Treinando Random Forest...
Acurácia do Random Forest: 0.8773
Matriz de Confusão do Random Forest:
[[4575  378]
 [ 825 4028]]
Relatório de Classificação do Random Forest:
              precision    recall  f1-score   support

           0       0.85      0.92      0.88      4953
           1       0.91      0.83      0.87      4853

    accuracy                           0.88      9806
   macro avg       0.88      0.88      0.88      9806
weighted a

In [None]:
## CP002_002_SERS.pdf
## Exercicio 2

In [None]:
y = df2['LV ActivePower (kW)']

In [None]:
X = df2[['Wind Speed (m/s)', 'Wind Direction (°)', 'Theoretical_Power_Curve (KWh)']]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [72]:
Models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor()
}

for model_name, model in Models.items():
    print(f"Treinando {model_name}...")

    model.fit(X_train_scaled, y_train)

    y_pred = model.predict(X_test_scaled)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f"RMSE do {model_name}: {rmse:.2f}")
    print(f"R² do {model_name}: {r2:.2f}")

Treinando Linear Regression...
RMSE do Linear Regression: 411.7100
R² do Linear Regression: 0.9007
--------------------------------------------------
Treinando Decision Tree Regressor...
RMSE do Decision Tree Regressor: 542.5470
R² do Decision Tree Regressor: 0.8275
--------------------------------------------------
Treinando Random Forest Regressor...
RMSE do Random Forest Regressor: 412.8437
R² do Random Forest Regressor: 0.9001
--------------------------------------------------
