In [None]:
%pip install pandas

In [None]:
import os 
import torch 
from torch import nn 
from torchvision import transforms
import pandas as pd 
import numpy as np 


In [None]:
df = pd.read_csv("Final_Dataset.csv")
print(df)

df.drop(['Time', 'Rainfall'], axis=1, inplace=True)

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Assuming your DataFrame is named 'df'
columns_to_normalize = ['WasteLand', 'Evergreen Forest', 'Degraded/Scrub Fores', 'Plantation', 
                         'Kharif Crop', 'Zaid Crop', 'Decidous Forest', 'Waterbodies max', 
                         'Waterbodies min', 'Current Fallow', 'Double/Triple Crop', 
                         'Rabi Crop', 'Built-up', 'Grassland', 'Littoral Swamp','GDP']

def Normalize(df, columns_to_normalize):
# Extract the values from the DataFrame for normalization
    data_to_normalize = df[columns_to_normalize].values

    # Create a MinMaxScaler
    scaler = MinMaxScaler()

    # Fit and transform the data
    normalized_data = scaler.fit_transform(data_to_normalize)

    # Update the DataFrame with the normalized values
    df[columns_to_normalize] = normalized_data

    # Display the normalized DataFrame
    print(df)
    return df

Normalize(df, columns_to_normalize)


In [None]:
X = df.iloc[:, :16]
Y = df.loc[:, ['GDP']]

In [None]:
print(X)

In [None]:
from sklearn.model_selection import KFold
from pytorch_tabnet.tab_model import TabNetRegressor
import numpy as np
from sklearn.model_selection import train_test_split

def TabNet(X,Y):

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.12)

    kf = KFold(n_splits=4, random_state=42, shuffle=True)
    predictions_array = []
    CV_score_array = []

    X = np.array(X)
    Y = np.array(Y)

    for train_index, test_index in kf.split(X):
        X_train, X_valid = X[train_index], X[test_index]
        y_train, y_valid = Y[train_index], Y[test_index]

        regressor = TabNetRegressor(verbose=0, seed=42, optimizer_fn=torch.optim.Adam)
        regressor.fit(X_train=X_train, y_train=y_train,
                    eval_set=[(X_valid, y_valid)],
                    patience=300, max_epochs=2000,
                    eval_metric=['rmse'],
                    )
        
        CV_score_array.append(regressor.best_cost)
        predictions_array.append(np.expm1(regressor.predict(np.array(X_test))))

    predictions = np.mean(predictions_array, axis=0)
    print("The CV score is %.5f" % np.mean(CV_score_array,axis=0) )
    return ("The CV score is %.5f" % np.mean(CV_score_array,axis=0))


In [None]:
TabNet(X,Y)

In [None]:
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor 
from sklearn.svm import SVR 
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_squared_log_error
from sklearn.model_selection import RepeatedKFold, KFold, cross_val_score, train_test_split, GridSearchCV, RandomizedSearchCV
kf = KFold(n_splits=4, shuffle=True)
cv_scores, cv_std = [], []

In [None]:
def rmse(model):
    return np.sqrt(-cross_val_score(model, X, Y, scoring="neg_mean_squared_error", cv=kf))


In [None]:
def apply_learning_algorithm(model):
    score = rmse(model)
    cv_scores.append(score.mean())
    cv_std.append(score.std())

In [None]:
models = [LGBMRegressor(objective='regression',
                        num_leaves=166,
                        learning_rate=0.05, 
                        n_estimators=120,
                        max_bin = 55, 
                        bagging_fraction = 0.8,
                        bagging_freq = 5, 
                        feature_fraction = 0.2319,
                        feature_fraction_seed=9, 
                        bagging_seed=9,
                        min_data_in_leaf =6, 
                        min_sum_hessian_in_leaf = 11),
          SVR(kernel='rbf', C=10000, epsilon=0.05),
          XGBRegressor(max_depth=7,learning_rate=0.05,
                        n_estimators=700,
                        min_child_weight=0.5, 
                        colsample_bytree=0.8, 
                        subsample=0.8, 
                        eta=0.5,
                        seed=42)]

In [None]:
model_names = ['LGBMRegressor','SupportVectorRegressor','XGBRegressor']

In [None]:
from sklearn.model_selection import cross_val_score
cv_scores, cv_std = [], []
for model in models:
    apply_learning_algorithm(model)

In [None]:
cv_scores


In [None]:
cv_std

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, Y)
pred = model.predict(X)

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error 

print( 
  'mean_squared_error : ', mean_squared_error(Y, pred)) 
print( 
  'mean_absolute_error : ', mean_absolute_error(Y, pred)) 

In [None]:
print(X)

In [None]:
from sklearn.neural_network import MLPRegressor
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.01)
clf = MLPRegressor(random_state=1, max_iter=300).fit(X, Y)

In [None]:
y_score = clf.predict(X_test)

In [None]:
clf.score(X_train, y_train)

In [None]:
models[0].fit(df.drop('GDP', axis=1), df['GDP'])

mean_squared_error :  2.9681331771499286e-31
mean_absolute_error :  3.766828119263924e-16


In [None]:
models[0].fit(df.drop('GDP', axis=1), df['GDP'])
models[2].fit(df.drop('GDP', axis=1), df['GDP'])

# Number of synthetic data points to generate
num_samples = 10

# Generating synthetic data points
synthetic_data_points = []

for _ in range(num_samples):
    features = np.random.rand(df.shape[1] - 1)  # Random feature values excluding the target column
    predictions = [model.predict([features])[0] for model in models]  # Use each model to predict
    synthetic_data_points.append(predictions)

# Convert synthetic data points to a DataFrame
synthetic_data = pd.DataFrame(synthetic_data_points, columns=[f'Synthetic_{i}' for i in range(len(models))])

# Convert s