<a href="https://colab.research.google.com/github/nahidhasan0004/ML/blob/main/Daily_Forcast_Using_Random_Forest(MIN_TEMP).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import pandas as pd

# Load the uploaded Excel file
file_path = "/content/mintemp dhaka.xlsx"
excel_data = pd.ExcelFile(file_path)

# Display sheet names to understand structure
excel_data.sheet_names


['Sheet1']

In [16]:
# Load data from the first sheet
df = excel_data.parse('Sheet1')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,Station_name,St_ID,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,...,D_25,D_26,D_27,D_28,D_29,D_30,D_31,Lowest,Highest,Average
0,Dhaka,41923,1981,1,10.7,12.7,12.5,10.6,12.1,17.2,...,15.3,13.4,13.3,13.8,12.7,12.9,18.6,10.6,,
1,Dhaka,41923,1981,2,17.0,16.0,14.0,13.0,14.0,12.0,...,,20.3,20.0,20.0,,,,10.0,,
2,Dhaka,41923,1981,3,22.0,23.0,21.0,16.0,15.0,17.0,...,19.2,16.7,20.0,16.0,18.9,19.2,21.7,15.0,,
3,Dhaka,41923,1981,4,18.4,20.6,23.3,25.0,20.0,25.8,...,23.6,25.0,25.6,22.8,25.3,23.9,,17.2,,
4,Dhaka,41923,1981,5,27.2,21.8,23.0,23.9,25.1,22.8,...,21.7,24.0,24.9,24.7,21.2,21.4,23.9,14.7,,


In [20]:
# Drop the specified columns from the dataset
columns_to_drop = ['Station_name', 'St_ID', 'Lowest', 'Highest', 'Average']
df_filtered = df.drop(columns=columns_to_drop)

# Display the first few rows of the filtered dataset
df_filtered.head()


Unnamed: 0,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,D_07,D_08,...,D_22,D_23,D_24,D_25,D_26,D_27,D_28,D_29,D_30,D_31
0,1981,1,10.7,12.7,12.5,10.6,12.1,17.2,17.1,13.1,...,12.6,12.9,15.0,15.3,13.4,13.3,13.8,12.7,12.9,18.6
1,1981,2,17.0,16.0,14.0,13.0,14.0,12.0,14.0,18.0,...,15.0,14.2,14.2,,20.3,20.0,20.0,,,
2,1981,3,22.0,23.0,21.0,16.0,15.0,17.0,22.0,22.0,...,22.9,23.9,19.2,19.2,16.7,20.0,16.0,18.9,19.2,21.7
3,1981,4,18.4,20.6,23.3,25.0,20.0,25.8,23.4,22.0,...,21.7,21.6,23.1,23.6,25.0,25.6,22.8,25.3,23.9,
4,1981,5,27.2,21.8,23.0,23.9,25.1,22.8,24.4,14.7,...,24.4,26.7,26.9,21.7,24.0,24.9,24.7,21.2,21.4,23.9


In [27]:
# ধরুন df_filtered হল আপনার আগের ফিল্টার করা ডেটাসেট
time_series = []

for _, row in df_filtered.iterrows():
    year, month = int(row['Year']), int(row['Month'])
    for day in range(1, 32):
        col = f'D_{day:02d}'
        if col in row and not pd.isna(row[col]):
            try:
                date = pd.Timestamp(year=year, month=month, day=day)
                time_series.append((date, row[col]))
            except:
                continue

daily_df = pd.DataFrame(time_series, columns=['Date', 'MinTemp'])
daily_df.set_index('Date', inplace=True)
daily_df = daily_df.sort_index()

In [28]:
for lag in range(1, 8):  # গত 7 দিনের তাপমাত্রা
    daily_df[f'lag_{lag}'] = daily_df['MinTemp'].shift(lag)

daily_df.dropna(inplace=True)


In [29]:
from sklearn.model_selection import train_test_split

X = daily_df[[f'lag_{i}' for i in range(1, 8)]]
y = daily_df['MinTemp']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=365*5)  # শেষ 5 বছর টেস্ট


In [30]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))


MSE: 2.321023048767124


In [31]:
from datetime import timedelta
import numpy as np

future_days = 365 * 8
last_known = daily_df.iloc[-7:]['MinTemp'].values.tolist()

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

future_dates = pd.date_range(start=daily_df.index[-1] + timedelta(days=1), periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_MinTemp': future_preds})


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [32]:
future_df.to_csv('8_years_forecast_min_temp.csv')


In [11]:
# STEP 1: ফোরকাস্ট শুরু করব ২০২০-০১-০১ থেকে ৫ বছর
from datetime import timedelta
import numpy as np

# Ensure start date is in dataset
start_date = pd.Timestamp("2020-01-01")
start_idx = daily_df.index.get_loc(start_date)

# আগের ৭ দিনের ডেটা নিবো প্রেডিকশনের জন্য
last_known = daily_df.iloc[start_idx - 7:start_idx]['MaxTemp'].values.tolist()

future_days = 365 * 5  # 5 বছর

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

# STEP 2: তারিখের তালিকা বানাবো
future_dates = pd.date_range(start=start_date, periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_MaxTemp': future_preds})
future_df.set_index('Date', inplace=True)




In [12]:
# CSV ফাইলে সংরক্ষণ
future_df.to_csv('forecast_2020_2024.csv')