In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import xgboost as xgb

# Load the dataset
df = pd.read_csv('SolarPrediction.csv')
print(df.head())

# Exploratory Data Analysis (EDA)
plt.figure(figsize=(12, 6))
sns.histplot(df['UNIXTime'], kde=True)
plt.title('Distribution of UNIXTime')
plt.show()

# Drop unnecessary columns
df = df.drop(['Data', 'Time', 'TimeSunRise', 'TimeSunSet'], axis=1)
print(df.head())

# Heatmap of correlations
plt.figure(figsize=(10, 10))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Heatmap')
plt.show()

# Pairplot of the dataframe
sns.pairplot(df)
plt.show()

# Split data into features and target variable
X = df.drop('Radiation', axis=1)
y = df['Radiation']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)

# Convert the data into DMatrix format for xgboost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set parameters for xgboost with GPU support
params = {
    'objective': 'reg:squarederror',
    'tree_method': 'hist',
    'device': 'cuda',
    'random_state': 101
}

# Train the model
xg_reg = xgb.train(params, dtrain, num_boost_round=50)

# Evaluate the model
y_pred = xg_reg.predict(dtest)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"R2 Score: {r2}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")

# Cross-validation
cv_results = xgb.cv(params, xgb.DMatrix(X, label=y), nfold=10, num_boost_round=50, metrics='rmse', as_pandas=True, seed=101)
print(f"Cross-Validation RMSE Scores: {cv_results['test-rmse-mean']}")
print(f"Mean CV RMSE Score: {cv_results['test-rmse-mean'].mean()}")
print(f"Standard Deviation of CV RMSE Scores: {cv_results['test-rmse-std'].mean()}")

# Plot true vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('True vs Predicted Radiation Values')
plt.show()

# Plot residuals
residuals = y_test - y_pred
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True)
plt.title('Distribution of Residuals')
plt.xlabel('Residual')
plt.show()
