In [43]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

In [44]:
# Load data
file_path = r'C:\Users\kamrul\Desktop\Thesis\Random Forest/Netflix Data - STOCK.xlsx'  # Path to your file
data = pd.read_excel(file_path)


In [45]:
# Data Cleaning
data_cleaned = data.drop(columns=['Unnamed: 10', 'Unnamed: 11'], errors='ignore').dropna()


In [46]:
# Features and target
features = ['Open', 'High', 'Low', 'Volume']  # Use relevant columns
target = 'Close'

X = data[features]
y = data[target]

In [47]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [48]:

# Initialize Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [49]:
# Train the model
model.fit(X_train, y_train)


In [50]:
# Make predictions on test set
y_pred = model.predict(X_test)


In [51]:
# Mean Squared Error for Regression
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")


Mean Squared Error: 21.861218733921678


In [52]:
# Classification metrics
y_test_class = (y_test > y_test.mean()).astype(int)
y_pred_class = (y_pred > y_pred.mean()).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class)
recall = recall_score(y_test_class, y_pred_class)
f1 = f1_score(y_test_class, y_pred_class)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")


Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0


In [53]:
# Function to create future dates for the next 6 months
def create_future_dates(start_date, months=6):
    future_dates = [start_date + timedelta(days=i) for i in range(0, months * 30, 30)]
    return pd.DataFrame({'Date': future_dates})
# Function to predict future prices
def predict_future_prices(model, start_day, start_month, start_year):
    # Start date for prediction
    start_date = datetime(year=start_year, month=start_month, day=start_day)
    
    # Create future dates for the next 6 months
    future_data = create_future_dates(start_date)
    
    # Add placeholders for features (Open, High, Low, Volume) – replace with any method you use to get these values
    future_data['Open'] = np.random.uniform(data['Open'].min(), data['Open'].max(), size=len(future_data))
    future_data['High'] = np.random.uniform(data['High'].min(), data['High'].max(), size=len(future_data))
    future_data['Low'] = np.random.uniform(data['Low'].min(), data['Low'].max(), size=len(future_data))
    future_data['Volume'] = np.random.uniform(data['Volume'].min(), data['Volume'].max(), size=len(future_data))
    
    # Predict future Close prices
    features = ['Open', 'High', 'Low', 'Volume']
    future_data['Predicted_Close'] = model.predict(future_data[features])
    
    return future_data[['Date', 'Open', 'High', 'Low', 'Predicted_Close']]
# Function to take user input and provide predictions
def user_input_prediction():
    start_day = int(input("Enter the day: "))
    start_month = int(input("Enter the month: "))
    start_year = int(input("Enter the year: "))
    
    # Predict future stock prices
    predictions = predict_future_prices(model, start_day, start_month, start_year)

In [54]:
# Run the prediction for user input
user_input_prediction()

Enter the day:  1
Enter the month:  10
Enter the year:  2024
