<a href="https://colab.research.google.com/github/prachi-pandey-github/chatbot/blob/main/temperature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install matplotlib



In [None]:
import matplotlib.pyplot as plt

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import os
#matplotlib initilization
%matplotlib inline

pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',150)
plt.rcParams['font.size'] = 14
plt.rcParams['figure.figsize'] = (10,6)
plt.rcParams['figure.facecolor'] = '#00000000'


In [None]:
# Load data
data = pd.read_csv('/content/Ranchi data final.csv')

In [None]:
data

Unnamed: 0,wind_degree,pressure_mb,precip_mm,humidity,cloud,temperature_celsius
0,350,1010,0.0,62,50,30.0
1,340,1011,0.0,66,75,29.0
2,10,1010,0.0,100,75,25.0
3,298,1007,0.0,70,45,26.1
4,280,1009,0.0,94,75,26.0
...,...,...,...,...,...,...
212,320,1012,0.0,37,0,26.0
213,330,1013,0.0,34,25,26.0
214,10,1014,0.0,45,25,26.0
215,350,1013,0.0,33,75,30.0


In [None]:
min_value = data['temperature_celsius'].min()
min_value


9.0

In [None]:
# Feature selection
features = data[['wind_degree','pressure_mb','precip_mm', 'humidity','cloud']]
target = data['temperature_celsius']

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 217 entries, 0 to 216
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   wind_degree          217 non-null    int64  
 1   pressure_mb          217 non-null    int64  
 2   precip_mm            217 non-null    float64
 3   humidity             217 non-null    int64  
 4   cloud                217 non-null    int64  
 5   temperature_celsius  217 non-null    float64
dtypes: float64(2), int64(4)
memory usage: 10.3 KB


In [None]:
data.describe()

Unnamed: 0,wind_degree,pressure_mb,precip_mm,humidity,cloud,temperature_celsius
count,217.0,217.0,217.0,217.0,217.0,217.0
mean,113.502304,1016.175115,0.088802,76.97235,30.331797,18.211521
std,133.684022,4.838764,0.623525,17.237426,33.805415,4.731015
min,7.0,1001.0,0.0,33.0,0.0,9.0
25%,10.0,1014.0,0.0,64.0,0.0,15.0
50%,10.0,1018.0,0.0,82.0,15.0,18.0
75%,260.0,1020.0,0.0,94.0,50.0,22.0
max,360.0,1023.0,8.5,100.0,100.0,30.0


In [None]:
!pip install plotly matplotlib seaborn --quiet

In [None]:
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
%matplotlib inline

In [None]:
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 14
matplotlib.rcParams['figure.figsize'] = (10, 6)
matplotlib.rcParams['figure.facecolor'] = '#00000000'

In [None]:
data.temperature_celsius.describe()

count    217.000000
mean      18.211521
std        4.731015
min        9.000000
25%       15.000000
50%       18.000000
75%       22.000000
max       30.000000
Name: temperature_celsius, dtype: float64

In [None]:
fig = px.histogram(data,
                   x='temperature_celsius',
                   marginal='box',
                   nbins=47,
                  title='Distribution of temperature')
fig.update_layout(bargap=0.1)
fig.show()

In [None]:

fig = px.histogram(data,
                   x='humidity',
                   marginal='box',
                   color_discrete_sequence=['red'],
                  title='Distribution of humidity')
fig.update_layout(bargap=0.1)
fig.show()

In [None]:

fig = px.histogram(data,
                   x='precip_mm',
                   marginal='box',
                   color_discrete_sequence=['red'],
                  title='Distribution of precip_mm')
fig.update_layout(bargap=0.1)
fig.show()

In [None]:
data['Precipitation'] = data['precip_mm'].apply(lambda x: 'Yes' if x > 0 else 'No')

In [None]:
fig = px.scatter(data,
                 x='temperature_celsius',
                 y= 'cloud',
                 color= 'Precipitation',
                 color_discrete_map={'Yes': 'green', 'No': 'red'},
                 opacity=0.8,
                 title='temperature vs 	wind_degree')
fig.update_traces(marker_size=5)
fig.show()

In [None]:
#removing the rows that has the target
#column as null bcuz we want to train a
#model that predicts the temerature
#if it is not specified then we remove such rows.
data.dropna(subset=['temperature_celsius'], inplace=True)

In [None]:
#60% data for training the model and 20% for
#validation (evaluate different versions of the model
#as we try out different parameters to train the model )

#to report the final accuracy we use the test set
#training the model



In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [None]:
# Model training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
# Predictions
y_pred = model.predict(X_test)


In [None]:
# Evaluation
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


In [None]:
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R2: {r2}')


MAE: 1.995811147186147
MSE: 8.203792846101317
R2: 0.6391934276917381
