In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv('weatherDataJaipur.csv')

In [3]:
data.head()

Unnamed: 0,date,meantempm,meandewptm,meanpressurem,maxhumidity,minhumidity,maxtempm,mintempm,maxdewptm,mindewptm,maxpressurem,minpressurem,precipm
0,01-05-2020,34,-1,1005.63,24,4,43,26,9,-10,1009,999,0.0
1,02-05-2020,36,4,1005.46,21,6,43,29,10,-2,1008,1001,0.0
2,03-05-2020,35,6,1006.0,27,5,41,29,12,-2,1009,1000,0.0
3,04-05-2020,34,7,1005.65,29,6,41,27,13,0,1008,1001,0.0
4,05-05-2020,31,11,1007.94,61,13,38,24,16,6,1011,1003,5.0


In [4]:
data.tail()

Unnamed: 0,date,meantempm,meandewptm,meanpressurem,maxhumidity,minhumidity,maxtempm,mintempm,maxdewptm,mindewptm,maxpressurem,minpressurem,precipm
674,07-03-2022,24,2,1014.07,55,5,32,15,8,-6,1017,1011,0.0
675,08-03-2022,24,1,1014.41,42,7,32,15,5,-5,1017,1011,0.0
676,09-03-2022,26,3,1014.16,37,8,33,19,6,-1,1017,1009,0.0
677,10-03-2022,26,4,1013.76,38,6,34,19,8,0,1017,1009,0.0
678,11-03-2022,26,3,1013.0,46,6,34,18,10,-3,1016,1009,0.0


In [5]:
print("Number of missing values in each column:\n", data.isna().sum())

Number of missing values in each column:
 date             0
meantempm        0
meandewptm       0
meanpressurem    0
maxhumidity      0
minhumidity      0
maxtempm         0
mintempm         0
maxdewptm        0
mindewptm        0
maxpressurem     0
minpressurem     0
precipm          0
dtype: int64


In [6]:
selected_features = ['meantempm', 'meandewptm', 'meanpressurem', 'maxhumidity', 'minhumidity', 'maxtempm', 'mintempm', 'maxdewptm', 'mindewptm', 'maxpressurem', 'minpressurem', 'precipm']

In [7]:
data = data.drop(['date'], axis=1)
scaler = MinMaxScaler()
data[selected_features] = scaler.fit_transform(data[selected_features])

In [8]:
X = data[selected_features]
y = data['precipm']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
model = LinearRegression()

In [10]:
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)

In [12]:
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 7.312376112091836e-33


In [13]:
test_data = pd.concat([X_test, y_test], axis=1)

In [14]:
test_data.to_csv('test_data.csv', index=False)

In [15]:
test_data = pd.read_csv('test_data.csv')

In [16]:
test_data.head()

Unnamed: 0,meantempm,meandewptm,meanpressurem,maxhumidity,minhumidity,maxtempm,mintempm,maxdewptm,mindewptm,maxpressurem,minpressurem,precipm,precipm.1
0,0.285714,0.194444,0.70155,0.4,0.035294,0.25,0.275862,0.172414,0.705882,0.703704,0.714286,0.0,0.0
1,0.785714,0.388889,0.365116,0.141176,0.0,0.75,0.758621,0.482759,0.773109,0.407407,0.392857,0.0,0.0
2,0.642857,0.972222,0.093411,1.0,0.658824,0.464286,0.724138,0.896552,0.991597,0.296296,0.142857,0.0,0.0
3,0.857143,0.472222,0.416279,0.164706,0.035294,0.821429,0.862069,0.448276,0.806723,0.444444,0.392857,0.0,0.0
4,0.571429,0.972222,0.116667,1.0,0.282353,0.392857,0.724138,0.896552,0.840336,0.148148,0.214286,0.45614,0.45614


In [17]:
test_data[selected_features] = scaler.transform(test_data[selected_features])
X_test_new = test_data[selected_features]

In [18]:
y_pred_new = model.predict(X_test_new)

In [19]:
print("Predicted weather labels for the test data:")
print(y_pred_new)

Predicted weather labels for the test data:
[8.51513090e-15 8.51667600e-15 8.51323887e-15 8.51720284e-15
 8.00246230e-03 8.51792758e-15 8.51386756e-15 8.51489373e-15
 8.51505350e-15 9.23361034e-04 8.51622875e-15 8.51742871e-15
 8.51434718e-15 8.51373868e-15 8.51418807e-15 8.51665651e-15
 8.51536834e-15 6.15574023e-04 8.51527265e-15 1.23114805e-04
 8.51485880e-15 8.51302216e-15 8.51794426e-15 8.51627030e-15
 8.51502208e-15 8.51656930e-15 8.51348590e-15 8.51356764e-15
 7.07910126e-03 8.51438589e-15 8.51637002e-15 8.51195299e-15
 8.51046021e-15 1.56971376e-02 8.51669936e-15 8.51579571e-15
 9.23361034e-04 2.77008310e-03 8.51544638e-15 1.23114805e-03
 8.51490729e-15 8.51487602e-15 6.15574023e-04 8.51521553e-15
 8.51474942e-15 8.51464259e-15 8.51367234e-15 8.51611405e-15
 8.51678539e-15 8.51386475e-15 8.51182694e-15 8.51945825e-15
 6.15574023e-04 8.51461477e-15 8.51358046e-15 8.51709665e-15
 8.51238284e-15 8.51355586e-15 8.51376885e-15 8.51730371e-15
 8.51536810e-15 8.51495211e-15 8.51539506

In [20]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [21]:
y_pred = model.predict(X_test)

In [22]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [23]:
print('Mean Absolute Error:', mae)
print('Mean Squared Error:', mse)
print('R-squared Score:', r2)

Mean Absolute Error: 5.90289394585625e-17
Mean Squared Error: 7.312376112091836e-33
R-squared Score: 1.0


In [24]:
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy:', accuracy)

NameError: name 'accuracy_score' is not defined

In [25]:
from sklearn.metrics import accuracy_score

In [26]:
accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy:', accuracy)

ValueError: continuous is not supported