In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt

# Load the dataset into a pandas dataframe
df = pd.read_csv('new_data_full.csv')
df = df.loc[:, ~df.columns.str.contains('9')]
df = df.drop(columns=['Division','Year']).dropna()
df['Soil Type'] = df['Soil Type'].astype('category')
df['Soil Type'] = df['Soil Type'].cat.codes

# Split the dataset into X and y variables
X = df.iloc[:,:-1]
y = df['Yield']

# Split the dataset into two groups
group_standardization = X.copy()
group_normalization = X.copy()

# Apply standardization to the first group
scaler = StandardScaler()
group_standardization = scaler.fit_transform(group_standardization)

# Apply normalization to the second group
scaler = MinMaxScaler()
group_normalization = scaler.fit_transform(group_normalization)

# Run regression analysis on both groups
regression_standardization = LinearRegression().fit(group_standardization, y)
regression_normalization = LinearRegression().fit(group_normalization, y)

# Compare the accuracy of the predictions
standardization_predictions = regression_standardization.predict(group_standardization)
normalization_predictions = regression_normalization.predict(group_normalization)

# Compare the accuracy of the predictions using some metric, such as mean squared error
mse_standardization = mean_squared_error(y, standardization_predictions,squared=False)
mse_normalization = mean_squared_error(y, normalization_predictions,squared=False)

print("MSE using standardization: ", mse_standardization)
print("MSE using normalization: ", mse_normalization)

# Compare the accuracy of the predictions using R-squared scores
r2_standardization = r2_score(y, standardization_predictions)
r2_normalization = r2_score(y, normalization_predictions)

print("R-squared score using standardization: ", r2_standardization)
print("R-squared score using normalization: ", r2_normalization)

MSE using standardization:  1.820215154587943e-12
MSE using normalization:  1.198082050027141e-12
R-squared score using standardization:  1.0
R-squared score using normalization:  1.0
