<a href="https://colab.research.google.com/github/nahidhasan0004/ML/blob/main/Daily_Forcast_Using_Random_Forest(RH).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd

# Load the uploaded Excel file
file_path = "/content/rh dhaka.xlsx"
excel_data = pd.ExcelFile(file_path)

# Display sheet names to understand structure
excel_data.sheet_names


['Sheet1']

In [5]:
# Load data from the first sheet
df = excel_data.parse('Sheet1')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,Station_Name,Station_ID,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,...,D_23,D_24,D_25,D_26,D_27,D_28,D_29,D_30,D_31,avg
0,Dhaka,41923,1981,1,68,69,70,70,66,81,...,59,68,68,69,63,61,66.0,64.0,83.0,70
1,Dhaka,41923,1981,2,90,74,61,69,73,74,...,57,54,50,59,59,51,,,,67
2,Dhaka,41923,1981,3,73,59,54,54,47,40,...,68,77,80,66,77,73,77.0,71.0,88.0,65
3,Dhaka,41923,1981,4,85,82,77,79,79,68,...,81,73,63,66,72,67,82.0,71.0,,76
4,Dhaka,41923,1981,5,70,72,74,75,81,69,...,77,74,80,81,85,83,84.0,83.0,94.0,78


In [6]:
# Drop the specified columns from the dataset
columns_to_drop = ['Station_Name', 'Station_ID', 'avg']
df_filtered = df.drop(columns=columns_to_drop)

# Display the first few rows of the filtered dataset
df_filtered.head()


Unnamed: 0,Year,Month,D_01,D_02,D_03,D_04,D_05,D_06,D_07,D_08,...,D_22,D_23,D_24,D_25,D_26,D_27,D_28,D_29,D_30,D_31
0,1981,1,68,69,70,70,66,81,91,82,...,64,59,68,68,69,63,61,66.0,64.0,83.0
1,1981,2,90,74,61,69,73,74,65,64,...,59,57,54,50,59,59,51,,,
2,1981,3,73,59,54,54,47,40,57,59,...,58,68,77,80,66,77,73,77.0,71.0,88.0
3,1981,4,85,82,77,79,79,68,55,59,...,73,81,73,63,66,72,67,82.0,71.0,
4,1981,5,70,72,74,75,81,69,68,74,...,80,77,74,80,81,85,83,84.0,83.0,94.0


In [7]:
# ধরুন df_filtered হল আপনার আগের ফিল্টার করা ডেটাসেট
time_series = []

for _, row in df_filtered.iterrows():
    year, month = int(row['Year']), int(row['Month'])
    for day in range(1, 32):
        col = f'D_{day:02d}'
        if col in row and not pd.isna(row[col]):
            try:
                date = pd.Timestamp(year=year, month=month, day=day)
                time_series.append((date, row[col]))
            except:
                continue

daily_df = pd.DataFrame(time_series, columns=['Date', 'Rh'])
daily_df.set_index('Date', inplace=True)
daily_df = daily_df.sort_index()

In [8]:
for lag in range(1, 8):  # গত 7 দিনের তাপমাত্রা
    daily_df[f'lag_{lag}'] = daily_df['Rh'].shift(lag)

daily_df.dropna(inplace=True)


In [9]:
from sklearn.model_selection import train_test_split

X = daily_df[[f'lag_{i}' for i in range(1, 8)]]
y = daily_df['Rh']

X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=365*5)  # শেষ 5 বছর টেস্ট


In [10]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, y_pred))


MSE: 41.28606021917808


In [11]:
from datetime import timedelta
import numpy as np

future_days = 365 * 8
last_known = daily_df.iloc[-7:]['Rh'].values.tolist()

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

future_dates = pd.date_range(start=daily_df.index[-1] + timedelta(days=1), periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_Rh': future_preds})


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


In [13]:
future_df.to_csv('8_years_forecast_Rh_temp.csv')


In [11]:
# STEP 1: ফোরকাস্ট শুরু করব ২০২০-০১-০১ থেকে ৫ বছর
from datetime import timedelta
import numpy as np

# Ensure start date is in dataset
start_date = pd.Timestamp("2020-01-01")
start_idx = daily_df.index.get_loc(start_date)

# আগের ৭ দিনের ডেটা নিবো প্রেডিকশনের জন্য
last_known = daily_df.iloc[start_idx - 7:start_idx]['MaxTemp'].values.tolist()

future_days = 365 * 5  # 5 বছর

future_preds = []
for _ in range(future_days):
    x_input = np.array(last_known[-7:]).reshape(1, -1)
    pred = model.predict(x_input)[0]
    future_preds.append(pred)
    last_known.append(pred)

# STEP 2: তারিখের তালিকা বানাবো
future_dates = pd.date_range(start=start_date, periods=future_days)
future_df = pd.DataFrame({'Date': future_dates, 'Forecasted_MaxTemp': future_preds})
future_df.set_index('Date', inplace=True)




In [12]:
# CSV ফাইলে সংরক্ষণ
future_df.to_csv('forecast_2020_2024.csv')