In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv('PowerClimateJaipur.csv')

In [3]:
data.head()

Unnamed: 0,YEAR,MO,DY,T2M,T2MDEW,T2M_MIN,T2M_MAX,QV2M,PRECTOTCORR,PS,WS50M_MAX,WS50M_MIN,WS10M
0,1985,1,1,12.36,-2.92,5.68,21.5,3.23,0.0,96.89,7.97,3.25,3.35
1,1985,1,2,11.18,-7.24,4.64,20.35,2.26,0.0,97.19,8.47,0.76,3.15
2,1985,1,3,11.23,-8.38,5.12,19.85,2.01,0.0,97.19,7.74,0.46,2.54
3,1985,1,4,12.77,-6.98,5.69,20.59,2.26,0.0,96.91,6.34,1.61,2.8
4,1985,1,5,13.75,-1.0,7.94,22.08,3.72,0.01,96.81,8.22,1.06,2.95


In [4]:
data.tail()

Unnamed: 0,YEAR,MO,DY,T2M,T2MDEW,T2M_MIN,T2M_MAX,QV2M,PRECTOTCORR,PS,WS50M_MAX,WS50M_MIN,WS10M
13874,2022,12,27,12.78,-3.86,5.11,23.8,2.99,0.0,97.1,6.93,1.58,2.86
13875,2022,12,28,15.48,-2.29,7.48,25.02,3.36,0.0,97.1,5.16,0.88,2.32
13876,2022,12,29,16.73,1.49,8.91,26.8,4.39,0.0,97.06,5.73,2.16,2.77
13877,2022,12,30,17.36,6.12,12.35,25.29,6.16,0.04,97.13,9.35,3.69,4.26
13878,2022,12,31,13.58,2.93,8.13,21.94,4.94,0.0,97.27,8.14,3.96,3.52


In [5]:
print("Number of missing values in each column:\n", data.isna().sum())

Number of missing values in each column:
 YEAR           0
MO             0
DY             0
T2M            0
T2MDEW         0
T2M_MIN        0
T2M_MAX        0
QV2M           0
PRECTOTCORR    0
PS             0
WS50M_MAX      0
WS50M_MIN      0
WS10M          0
dtype: int64


In [13]:
selected_features = ['T2M', 'T2MDEW', 'T2M_MIN', 'T2M_MAX', 'QV2M', 'PRECTOTCORR', 'PS', 'WS50M_MAX', 'WS50M_MIN', 'WS10M']

In [15]:

scaler = MinMaxScaler()
data[selected_features] = scaler.fit_transform(data[selected_features])

In [16]:
X = data[selected_features]
y = data[selected_features]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
model = LinearRegression()

In [18]:
model.fit(X_train, y_train)

In [19]:
y_pred = model.predict(X_test)

In [20]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 1.3428005389777242e-32


In [21]:
test_data = pd.concat([X_test, y_test], axis=1)

In [22]:
test_data.to_csv('test_data.csv', index=False)

In [23]:
test_data = pd.read_csv('test_data.csv')

In [24]:
test_data.head()

Unnamed: 0,T2M,T2MDEW,T2M_MIN,T2M_MAX,QV2M,PRECTOTCORR,PS,WS50M_MAX,WS50M_MIN,WS10M,T2M.1,T2MDEW.1,T2M_MIN.1,T2M_MAX.1,QV2M.1,PRECTOTCORR.1,PS.1,WS50M_MAX.1,WS50M_MIN.1,WS10M.1
0,0.997246,0.99169,0.9966,0.995602,0.990984,0.91592,0.997866,0.989636,0.992023,0.992163,0.997246,0.99169,0.9966,0.995602,0.990984,0.91592,0.997866,0.989636,0.992023,0.992163
1,0.988099,0.997113,0.990081,0.985544,0.996358,0.916452,0.998112,0.991222,0.994242,0.993283,0.988099,0.997113,0.990081,0.985544,0.996358,0.916452,0.998112,0.991222,0.994242,0.993283
2,0.984161,0.973586,0.983175,0.986242,0.981596,0.915884,0.999371,0.99137,0.992835,0.993203,0.984161,0.973586,0.983175,0.986242,0.981596,0.915884,0.999371,0.99137,0.992835,0.993203
3,0.994232,0.99367,0.996358,0.99128,0.992717,0.917497,0.998304,0.991646,0.993301,0.993669,0.994232,0.99367,0.996358,0.99128,0.992717,0.917497,0.998304,0.991646,0.993301,0.993669
4,0.998431,0.984589,0.99785,0.998757,0.98608,0.915884,0.998112,0.996601,0.997384,0.996473,0.998431,0.984589,0.99785,0.998757,0.98608,0.915884,0.998112,0.996601,0.997384,0.996473


In [25]:
test_data[selected_features] = scaler.transform(test_data[selected_features])
X_test_new = test_data[selected_features]

In [26]:
y_pred_new = model.predict(X_test_new)

In [27]:
print("Predicted weather labels for the test data:")
print(y_pred_new)

Predicted weather labels for the test data:
[[0.96285975 0.9753923  0.96860414 ... 0.98516293 0.99097416 0.99073866]
 [0.96285094 0.97539759 0.96859783 ... 0.9851645  0.99097636 0.99073977]
 [0.96284715 0.97537464 0.96859114 ... 0.98516464 0.99097496 0.99073969]
 ...
 [0.96285883 0.97537939 0.96860168 ... 0.98516276 0.9909729  0.99073859]
 [0.96284081 0.97536883 0.96858467 ... 0.98516094 0.99097219 0.99073753]
 [0.96286089 0.97538056 0.96860346 ... 0.98516505 0.99097571 0.99074045]]


In [28]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [29]:
y_pred = model.predict(X_test)

In [30]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [31]:
print('Mean Absolute Error:', mae)
print('Mean Squared Error:', mse)
print('R-squared Score:', r2)

Mean Absolute Error: 6.592549113083962e-17
Mean Squared Error: 1.3428005389777242e-32
R-squared Score: 1.0


In [34]:
from sklearn.metrics import accuracy_score

In [35]:
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy:', accuracy)

ValueError: continuous-multioutput is not supported

In [27]:
from sklearn.metrics import accuracy_score

In [28]:
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy:', accuracy)

ValueError: continuous-multioutput is not supported