# Logistic Regression model against IHSG price

In [None]:
import sys
!{sys.executable} -m pip install -r requirements_notebook.txt -r requirements_modeling.txt




[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [129]:
import json
import pandas as pd
from argparse import ArgumentParser

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

from synchronize_data import (
    fetch_ihsg_data,
    fetch_daily_data,
    fetch_mcap_data,
    fetch_idr_usd_rate,
    fetch_temp_bonds_rate,
    fetch_idr_interest_rate,
)
from fear_and_greed import FearAndGreedIndex, normalize_data

# Preparing the data

In [130]:
timeframe: int = 380
avg_period: int = 7

### Fear and Greed Index Data

In [131]:
daily_data = fetch_daily_data(timeframe)
mcap_data = fetch_mcap_data(timeframe)
exchange_rate_data = fetch_idr_usd_rate(timeframe)
interest_data = fetch_idr_interest_rate()
bonds_data = fetch_temp_bonds_rate(timeframe)

with open("parameters/average_methods.json") as f:
    moving_avg_methods = json.load(f)

fear_and_greed_index = FearAndGreedIndex(
    daily_data, mcap_data, exchange_rate_data, interest_data, bonds_data
)
fear_and_greed_index.set_moving_avg_method(moving_avg_methods)
fear_and_greed_data = fear_and_greed_index.calculate_fear_and_greed_index(
    timeframe, avg_period
)

fear_and_greed_data.tail(10)

https://openexchangerates.org/api/historical/2024-11-29.json
https://investing.com/rates-bonds/indonesia-10-year-bond-yield-historical-data


Unnamed: 0_level_0,momentum,strength,volatility,volume_breadth,safe_haven,exchange_rate,interest_rate,buffett,fear_and_greed_index
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-11-14,40.811577,31.210789,18.375322,40.164515,60.271459,43.941022,25.0,71.50842,41.410388
2024-11-15,40.23021,31.876159,17.406516,41.977078,60.298278,47.53617,25.0,70.769165,41.886697
2024-11-18,40.247553,32.170882,17.78598,52.641624,60.026661,55.489642,25.0,70.417127,44.222434
2024-11-19,42.119539,33.090038,18.90962,59.103771,58.349189,56.255098,25.0,69.884525,45.338973
2024-11-20,42.336065,34.020766,18.573896,53.804367,59.070881,56.552572,50.0,69.138966,47.937189
2024-11-21,41.498087,32.980743,18.404542,44.616025,60.744639,57.364644,50.0,67.923623,46.691538
2024-11-22,42.721039,33.412094,17.669958,58.507687,58.55308,56.705185,50.0,67.469384,48.129803
2024-11-25,44.400193,35.423016,19.207896,66.099464,61.571328,58.103357,50.0,67.92152,50.340847
2024-11-26,42.60845,35.190719,18.761959,65.235932,58.034171,58.510593,50.0,68.264341,49.575771
2024-11-28,41.355113,34.805623,20.14275,56.323517,57.618327,59.013804,50.0,68.59293,48.481508


### IHSG Data

In [132]:
# Fetch IHSG data as the target variable
ihsg_data = fetch_ihsg_data(timeframe)
ihsg_data["change"] = ihsg_data["price"].pct_change()
ihsg_data["change_scaled"] = normalize_data(ihsg_data["change"], scale=(0, 0))


ihsg_data.tail(10)

Unnamed: 0,index_code,date,price,change,change_scaled
240,IHSG,2024-11-14,7214.56,-0.012876,41.40665
241,IHSG,2024-11-15,7161.26,-0.007388,52.166014
242,IHSG,2024-11-18,7134.28,-0.003767,59.262951
243,IHSG,2024-11-19,7195.71,0.008611,83.527527
244,IHSG,2024-11-20,7180.33,-0.002137,62.458442
245,IHSG,2024-11-21,7140.91,-0.00549,55.886336
246,IHSG,2024-11-22,7195.56,0.007653,81.650637
247,IHSG,2024-11-25,7314.11,0.016475,98.945034
248,IHSG,2024-11-26,7245.89,-0.009327,48.36434
249,IHSG,2024-11-28,7200.16,-0.006311,54.27661


## Prepare the data for modeling

In [133]:
# Shift the date of fear and greed index forward by 1 day
# to simulate prediction of IHSG
fear_and_greed = fear_and_greed_data.copy()

fear_and_greed = fear_and_greed.dropna()
fear_and_greed.index = pd.to_datetime(fear_and_greed.index)
fear_and_greed.index = fear_and_greed.index.shift(1, freq="D")

fear_and_greed.tail(10)

Unnamed: 0_level_0,momentum,strength,volatility,volume_breadth,safe_haven,exchange_rate,interest_rate,buffett,fear_and_greed_index
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-11-15,40.811577,31.210789,18.375322,40.164515,60.271459,43.941022,25.0,71.50842,41.410388
2024-11-16,40.23021,31.876159,17.406516,41.977078,60.298278,47.53617,25.0,70.769165,41.886697
2024-11-19,40.247553,32.170882,17.78598,52.641624,60.026661,55.489642,25.0,70.417127,44.222434
2024-11-20,42.119539,33.090038,18.90962,59.103771,58.349189,56.255098,25.0,69.884525,45.338973
2024-11-21,42.336065,34.020766,18.573896,53.804367,59.070881,56.552572,50.0,69.138966,47.937189
2024-11-22,41.498087,32.980743,18.404542,44.616025,60.744639,57.364644,50.0,67.923623,46.691538
2024-11-23,42.721039,33.412094,17.669958,58.507687,58.55308,56.705185,50.0,67.469384,48.129803
2024-11-26,44.400193,35.423016,19.207896,66.099464,61.571328,58.103357,50.0,67.92152,50.340847
2024-11-27,42.60845,35.190719,18.761959,65.235932,58.034171,58.510593,50.0,68.264341,49.575771
2024-11-29,41.355113,34.805623,20.14275,56.323517,57.618327,59.013804,50.0,68.59293,48.481508


In [134]:
ihsg_copy = ihsg_data.copy()
ihsg_copy = ihsg_copy.set_index("date")
ihsg_copy.index = pd.to_datetime(ihsg_copy.index)

ihsg_copy.tail(10)

Unnamed: 0_level_0,index_code,price,change,change_scaled
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-11-14,IHSG,7214.56,-0.012876,41.40665
2024-11-15,IHSG,7161.26,-0.007388,52.166014
2024-11-18,IHSG,7134.28,-0.003767,59.262951
2024-11-19,IHSG,7195.71,0.008611,83.527527
2024-11-20,IHSG,7180.33,-0.002137,62.458442
2024-11-21,IHSG,7140.91,-0.00549,55.886336
2024-11-22,IHSG,7195.56,0.007653,81.650637
2024-11-25,IHSG,7314.11,0.016475,98.945034
2024-11-26,IHSG,7245.89,-0.009327,48.36434
2024-11-28,IHSG,7200.16,-0.006311,54.27661


In [135]:
merged_data = fear_and_greed.join([ihsg_copy])

merged_data.tail(10)

Unnamed: 0_level_0,momentum,strength,volatility,volume_breadth,safe_haven,exchange_rate,interest_rate,buffett,fear_and_greed_index,index_code,price,change,change_scaled
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2024-11-15,40.811577,31.210789,18.375322,40.164515,60.271459,43.941022,25.0,71.50842,41.410388,IHSG,7161.26,-0.007388,52.166014
2024-11-16,40.23021,31.876159,17.406516,41.977078,60.298278,47.53617,25.0,70.769165,41.886697,,,,
2024-11-19,40.247553,32.170882,17.78598,52.641624,60.026661,55.489642,25.0,70.417127,44.222434,IHSG,7195.71,0.008611,83.527527
2024-11-20,42.119539,33.090038,18.90962,59.103771,58.349189,56.255098,25.0,69.884525,45.338973,IHSG,7180.33,-0.002137,62.458442
2024-11-21,42.336065,34.020766,18.573896,53.804367,59.070881,56.552572,50.0,69.138966,47.937189,IHSG,7140.91,-0.00549,55.886336
2024-11-22,41.498087,32.980743,18.404542,44.616025,60.744639,57.364644,50.0,67.923623,46.691538,IHSG,7195.56,0.007653,81.650637
2024-11-23,42.721039,33.412094,17.669958,58.507687,58.55308,56.705185,50.0,67.469384,48.129803,,,,
2024-11-26,44.400193,35.423016,19.207896,66.099464,61.571328,58.103357,50.0,67.92152,50.340847,IHSG,7245.89,-0.009327,48.36434
2024-11-27,42.60845,35.190719,18.761959,65.235932,58.034171,58.510593,50.0,68.264341,49.575771,,,,
2024-11-29,41.355113,34.805623,20.14275,56.323517,57.618327,59.013804,50.0,68.59293,48.481508,,,,


In [136]:
merged_data = merged_data.dropna()

x = merged_data[
    [
        "momentum",
        "strength",
        "volatility",
        "volume_breadth",
        "safe_haven",
        "exchange_rate",
        "interest_rate",
        "buffett",
    ]
]

y = merged_data["change"].apply(
    lambda x: 1 if x > 0 else 0
)

In [137]:
x.tail(5)

Unnamed: 0_level_0,momentum,strength,volatility,volume_breadth,safe_haven,exchange_rate,interest_rate,buffett
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-11-19,40.247553,32.170882,17.78598,52.641624,60.026661,55.489642,25.0,70.417127
2024-11-20,42.119539,33.090038,18.90962,59.103771,58.349189,56.255098,25.0,69.884525
2024-11-21,42.336065,34.020766,18.573896,53.804367,59.070881,56.552572,50.0,69.138966
2024-11-22,41.498087,32.980743,18.404542,44.616025,60.744639,57.364644,50.0,67.923623
2024-11-26,44.400193,35.423016,19.207896,66.099464,61.571328,58.103357,50.0,67.92152


In [138]:
y.tail(5)

date
2024-11-19    1
2024-11-20    0
2024-11-21    0
2024-11-22    1
2024-11-26    0
Name: change, dtype: int64

## Modeling

In [151]:
# Split the data for modeling
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [152]:
# Train the logistic regression model

model = LogisticRegression()
model.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [153]:
# Extract Coefficients
coefficients = pd.DataFrame({'Feature': x.columns, 'Coefficient': model.coef_[0]})
print(coefficients)

          Feature  Coefficient
0        momentum    -0.080848
1        strength     0.060188
2      volatility    -0.004500
3  volume_breadth     0.003061
4      safe_haven     0.017326
5   exchange_rate     0.008136
6   interest_rate    -0.010611
7         buffett    -0.010629


In [154]:
train_score = model.score(x_train, y_train)
test_score = model.score(x_test, y_test)

print(f"training set score: {train_score}")
print(f"test set score: {test_score}")

training set score: 0.5135135135135135
test set score: 0.5263157894736842
