<a href="https://colab.research.google.com/github/nahidhasan0004/ML/blob/main/Daily_Forcast_Using_Random_Forest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd

# Load the uploaded Excel file
file_path = "/content/maxtemp dhaka.xlsx"
excel_data = pd.ExcelFile(file_path)

# Display sheet names to understand structure
excel_data.sheet_names


['Sheet1']

In [3]:
# Load data from the first sheet
df = excel_data.parse('Sheet1')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,station_name,station_ID,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,...,D_25,D_26,D_27,D_28,D_29,D_30,D_31,Lowest,Highest,Average
0,Dhaka,41923,1981,1,23.9,24.4,24.6,25.6,27.2,26.6,...,24.9,26.7,26.8,25.6,24.6,27.8,24.4,20.6,27.8,25.119355
1,Dhaka,41923,1981,2,25.0,25.1,25.0,24.0,26.0,27.0,...,32.0,32.9,32.8,,,,,17.0,32.9,26.948
2,Dhaka,41923,1981,3,32.0,32.6,30.1,30.0,31.0,33.0,...,24.9,28.5,28.2,28.1,,30.6,27.4,24.9,34.0,30.413793
3,Dhaka,41923,1981,4,26.7,30.6,32.4,31.4,33.2,33.8,...,33.9,33.9,33.3,33.1,29.2,32.7,,20.0,35.4,30.646667
4,Dhaka,41923,1981,5,35.0,30.7,30.9,32.2,31.7,32.2,...,30.6,31.1,30.0,32.2,32.2,31.6,27.8,27.1,35.0,31.883871


In [4]:
# Drop the specified columns from the dataset
columns_to_drop = ['station_name', 'station_ID', 'Lowest', 'Highest', 'Average']
df_filtered = df.drop(columns=columns_to_drop)

# Display the first few rows of the filtered dataset
df_filtered.head()


Unnamed: 0,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,D_07,D_08,...,D_22,D_23,D_24,D_25,D_26,D_27,D_28,D_29,D_30,D_31
0,1981,1,23.9,24.4,24.6,25.6,27.2,26.6,21.1,20.6,...,25.6,26.7,26.1,24.9,26.7,26.8,25.6,24.6,27.8,24.4
1,1981,2,25.0,25.1,25.0,24.0,26.0,27.0,29.0,28.0,...,30.0,31.0,30.4,32.0,32.9,32.8,,,,
2,1981,3,32.0,32.6,30.1,30.0,31.0,33.0,33.0,34.0,...,33.1,,30.9,24.9,28.5,28.2,28.1,,30.6,27.4
3,1981,4,26.7,30.6,32.4,31.4,33.2,33.8,33.4,33.2,...,30.4,32.4,32.1,33.9,33.9,33.3,33.1,29.2,32.7,
4,1981,5,35.0,30.7,30.9,32.2,31.7,32.2,32.7,31.7,...,31.3,33.9,33.9,30.6,31.1,30.0,32.2,32.2,31.6,27.8


In [5]:
# ধরুন df_filtered হল আপনার আগের ফিল্টার করা ডেটাসেট
time_series = []

for _, row in df_filtered.iterrows():
    year, month = int(row['Year']), int(row['Month'])
    for day in range(1, 32):
        col = f'D_{day:02d}'
        if col in row and not pd.isna(row[col]):
            try:
                date = pd.Timestamp(year=year, month=month, day=day)
                time_series.append((date, row[col]))
            except:
                continue

daily_df = pd.DataFrame(time_series, columns=['Date', 'MaxTemp'])
daily_df.set_index('Date', inplace=True)
daily_df = daily_df.sort_index()

In [6]:
for lag in range(1, 8):  # গত 7 দিনের তাপমাত্রা
    daily_df[f'lag_{lag}'] = daily_df['MaxTemp'].shift(lag)

daily_df.dropna(inplace=True)


In [7]:
from sklearn.model_selection import train_test_split

X = daily_df[[f'lag_{i}' for i in range(1, 8)]]
y = daily_df['MaxTemp']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=365*5)  # শেষ 5 বছর টেস্ট


In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))


MSE: 3.127841409315069


In [9]:
from datetime import timedelta
import numpy as np

future_days = 365 * 5
last_known = daily_df.iloc[-7:]['MaxTemp'].values.tolist()

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

future_dates = pd.date_range(start=daily_df.index[-1] + timedelta(days=1), periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_MaxTemp': future_preds})




In [10]:
future_df.to_csv('5_years_forecast_max_temp.csv')


In [11]:
# STEP 1: ফোরকাস্ট শুরু করব ২০২০-০১-০১ থেকে ৫ বছর
from datetime import timedelta
import numpy as np

# Ensure start date is in dataset
start_date = pd.Timestamp("2020-01-01")
start_idx = daily_df.index.get_loc(start_date)

# আগের ৭ দিনের ডেটা নিবো প্রেডিকশনের জন্য
last_known = daily_df.iloc[start_idx - 7:start_idx]['MaxTemp'].values.tolist()

future_days = 365 * 5  # 5 বছর

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

# STEP 2: তারিখের তালিকা বানাবো
future_dates = pd.date_range(start=start_date, periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_MaxTemp': future_preds})
future_df.set_index('Date', inplace=True)




In [12]:
# CSV ফাইলে সংরক্ষণ
future_df.to_csv('forecast_2020_2024.csv')