In [1]:
# Provides ways to work with large multidimensional arrays
import numpy as np 
# Allows for further data manipulation and analysis
import pandas as pd
from pandas_datareader import data as web # Reads stock data 
import matplotlib.pyplot as plt # Plotting
import matplotlib.dates as mdates # Styling dates
%matplotlib inline

import datetime as dt # For defining dates
import mplfinance as mpf # Matplotlib finance

import time

# Used to get data from a directory
import os
from os import listdir
from os.path import isfile, join
import random

#Statsmodels is a great library we can use to run regressions.
import statsmodels.api as sm
# Seaborn extends the capabilities of Matplotlib
import seaborn as sns
# Used for calculating regressions
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

In [3]:
risk_free_rate = 0.0125
PATH = 'E:\\Projects\\Finoobs\\Portfolio Optimization\\stocks\\'

In [4]:
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col='Date', parse_dates=True)
    except FileNotFoundError:
        pass
        # print("File Doesn't Exist")
    else:
        return df

def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    # Will hold data for all dataframes with the same column name
    mult_df = pd.DataFrame()
    sdate = pd.to_datetime(sdate)
    edate = pd.to_datetime(edate)

    for x in tickers:
        df = get_df_from_csv(x)
        df.index = pd.to_datetime(df.index)
        mask = (df.index >= sdate) & (df.index <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

def get_file_list(folder_path):
    return [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

# Function to randomly select files and store them in a list
def get_random_files(folder_path, num_files=None):
    file_list = get_file_list(folder_path)
    
    # If num_files is None, select all files
    if num_files is None or num_files > len(file_list):
        num_files = len(file_list)
    
    # Randomly select files
    random_files = random.sample(file_list, num_files)

    sliced_files = [file[:-4] for file in random_files]
    
    return sliced_files


In [5]:
num_files_to_select = 11 

port_list = get_random_files(PATH, num_files_to_select)

print("Randomly selected files:")
print(port_list)

Randomly selected files:
['AGROPHOS', 'ECOSMOBLTY', 'TVSMOTOR', 'AVANTIFEED', 'S&SPOWER', 'JTEKTINDIA', 'APLAPOLLO', 'RELIABLE', 'TCI', 'UMAEXPORTS', 'HLVLTD']


In [30]:
mult_df = merge_df_by_column_name('Adj Close',  '2019-01-01', '2024-09-19' , *port_list)
mult_df

Unnamed: 0_level_0,UPL,NIITTECH,SUMICHEM,AMARAJABAT,CENTURYTEX,MRF,GRINDWELL,NLCINDIA,MGL,SYNGENE,PNB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-01-01,471.213684,,,,893.593872,66200.500000,527.644226,50.252857,793.661682,277.367249,76.188187
2019-01-02,473.089783,,,,877.318481,66036.296875,523.020996,49.527180,783.049255,274.755066,74.326447
2019-01-03,466.929901,,,,870.101135,66217.398438,522.690857,49.382046,774.916077,272.416504,74.326447
2019-01-04,473.027222,,,,879.982605,66004.234375,524.719360,49.454617,781.483521,270.898895,77.620300
2019-01-07,477.623657,,,,876.882629,66376.257812,527.880066,49.563465,787.876953,274.083313,76.904236
...,...,...,...,...,...,...,...,...,...,...,...
2024-09-12,614.849976,,591.599976,,2783.100098,135962.250000,2463.100098,264.068420,1829.849976,917.150024,108.720001
2024-09-13,611.400024,,591.799988,,2896.100098,137353.953125,2469.500000,268.294708,1798.650024,923.849976,111.110001
2024-09-16,613.799988,,592.349976,,2860.850098,136427.000000,2435.550049,267.350006,1821.400024,938.200012,110.809998
2024-09-17,610.650024,,613.500000,,2787.949951,135219.203125,2397.699951,271.850006,1909.949951,910.599976,108.029999


In [None]:
(mult_df / mult_df.iloc[0] * 100).plot(figsize=(16, 9))

In [None]:
mult_df = mult_df.apply(pd.to_numeric, errors='coerce')

# # Calculate logarithmic returns
returns = np.log(mult_df / mult_df.shift(1))
mean_ret = returns.mean() * 252 # 252 average trading days per year
mean_ret

In [None]:
returns.cov() * 252

In [None]:
returns.corr()


In [None]:
# Generate 10 random values that sum to 1
weights = np.random.random(11)
weights /= np.sum(weights)
print('Weights :', weights)
print('Total Weight :', np.sum(weights))

In [None]:
# Provide return of portfolio using random weights over the whole dataset
np.sum(weights * returns.mean()) * 252

In [None]:
np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))

In [None]:
p_ret = [] # Returns list
p_vol = [] # Volatility list
p_SR = [] # Sharpe Ratio list
p_wt = [] # Stock weights list


for x in range(10000):
    # Generate random weights
    p_weights = np.random.random(11)
    p_weights /= np.sum(p_weights)
    
    # Add return using those weights to list
    ret_1 = np.sum(p_weights * returns.mean()) * 252
    p_ret.append(ret_1)
    
    # Add volatility or standard deviation to list
    vol_1 = np.sqrt(np.dot(p_weights.T, np.dot(returns.cov() * 252, p_weights)))
    p_vol.append(vol_1)
    
    # Get Sharpe ratio
    SR_1 = (ret_1 - risk_free_rate) / vol_1
    p_SR.append(SR_1)
    
    # Store the weights for each portfolio
    p_wt.append(p_weights)
    
# Convert to Numpy arrays
p_ret = np.array(p_ret)
p_vol = np.array(p_vol)
p_SR = np.array(p_SR)
p_wt = np.array(p_wt)

p_ret, p_vol, p_SR, p_wt

In [None]:
# Create a dataframe with returns and volatility
ports = pd.DataFrame({'Return': p_ret, 'Volatility': p_vol})

ports.plot(x='Volatility', y='Return', kind='scatter', figsize=(16, 9))

In [None]:
# Return the index of the largest Sharpe Ratio
SR_idx = np.argmax(p_SR)

# Find the ideal portfolio weighting at that index
i = 0
while i < 11:
    print("Stock : %s : %2.2f" % (port_list[i], (p_wt[4296][i] * 100)))
    i += 1
    
# Find volatility of that portfolio
print("\nVolatility :", p_vol[4296])
      
# Find return of that portfolio
print("Return :", p_ret[4296])

In [22]:
import numpy as np
import pandas as pd
import datetime as dt
import random
import yfinance as yf

PATH = 'E:\\Projects\\Finoobs\\Portfolio Optimization\\stocks\\'
risk_free_rate = 0.0125

def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(PATH + ticker + '.csv', index_col='Date', parse_dates=True)
    except FileNotFoundError:
        pass
    else:
        return df

def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    mult_df = pd.DataFrame()
    sdate = pd.to_datetime(sdate)
    edate = pd.to_datetime(edate)

    for x in tickers:
        df = get_df_from_csv(x)
        df.index = pd.to_datetime(df.index)
        mask = (df.index >= sdate) & (df.index <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

def get_random_files(folder_path, num_files=None):
    file_list = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    if num_files is None or num_files > len(file_list):
        num_files = len(file_list)
    
    random_files = random.sample(file_list, num_files)
    return [file[:-4] for file in random_files]

def optimize_portfolio(investment_amount):
    num_files_to_select = 11
    port_list = get_random_files(PATH, num_files_to_select)
    

    end_date = dt.datetime.now().strftime('%Y-%m-%d')
    mult_df = merge_df_by_column_name('Adj Close', '2019-01-01', end_date, *port_list)
    mult_df = mult_df.apply(pd.to_numeric, errors='coerce')

    returns = np.log(mult_df / mult_df.shift(1))
    
    p_ret = []
    p_vol = []
    p_SR = []
    p_wt = []

    port_list = [ticker + ".NS" for ticker in port_list]
    print(port_list)
    
    for _ in range(10000):
        weights = np.random.random(num_files_to_select)
        weights /= np.sum(weights)
        
        ret = np.sum(weights * returns.mean()) * 252
        vol = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
        sr = (ret - risk_free_rate) / vol
        
        p_ret.append(ret)
        p_vol.append(vol)
        p_SR.append(sr)
        p_wt.append(weights)

    p_ret = np.array(p_ret)
    p_vol = np.array(p_vol)
    p_SR = np.array(p_SR)
    p_wt = np.array(p_wt)

    optimal_idx = np.argmax(p_SR)
    optimal_weights = p_wt[optimal_idx]

    # Get current prices
    current_prices = {}
    for stock in port_list:
        # stock = stock+'.NS'
        ticker = yf.Ticker(stock)
        try:
            current_prices[stock] = ticker.info.get('currentPrice', np.nan)
            
        except KeyError:
            print(f"Price data for {stock} is not available.")
            continue
    print(current_prices)

    # Calculate number of shares and investment amount for each stock
    total_investment = 0
    recommendations = []
    for i, stock in enumerate(port_list):
        
        amount = investment_amount * optimal_weights[i]
        shares = int(amount / current_prices[stock])
        actual_investment = shares * current_prices[stock]
        total_investment += actual_investment
        recommendations.append({
            'stock': stock,
            'weight': optimal_weights[i],
            'shares': shares,
            'investment': actual_investment
        })

    return recommendations, p_ret[optimal_idx], p_vol[optimal_idx], total_investment

def main():
    investment_amount = float(input("Enter the total investment amount: "))
    recommendations, expected_return, volatility, total_invested = optimize_portfolio(investment_amount)

    print("\nInvestment Recommendations:")
    for rec in recommendations:
        print(f"Stock: {rec['stock']}")
        print(f"  Weight: {rec['weight']:.2%}")
        print(f"  Shares: {rec['shares']}")
        print(f"  Investment: ${rec['investment']:.2f}")
        print()

    print(f"Total Invested: ${total_invested:.2f}")
    print(f"Expected Annual Return: {expected_return:.2%}")
    print(f"Expected Volatility: {volatility:.2%}")
    print(f"Sharpe Ratio: {(expected_return - risk_free_rate) / volatility:.2f}")

if __name__ == "__main__":
    main()

['APLAPOLLO.NS', 'MUTHOOTFIN.NS', 'ARVEE.NS', 'ICRA.NS', 'APOLLO.NS', 'NRAIL.NS', 'RKFORGE.NS', 'HMAAGRO.NS', 'SPORTKING.NS', 'JSFB.NS', 'SPAL.NS']
{'APLAPOLLO.NS': 1541.15, 'MUTHOOTFIN.NS': 1931.45, 'ARVEE.NS': 171.9, 'ICRA.NS': 7231.3, 'APOLLO.NS': 102.55, 'NRAIL.NS': 359.4, 'RKFORGE.NS': 934.55, 'HMAAGRO.NS': 44.63, 'SPORTKING.NS': 104.0, 'JSFB.NS': 469.75, 'SPAL.NS': 919.55}

Investment Recommendations:
Stock: APLAPOLLO.NS
  Weight: 14.70%
  Shares: 9
  Investment: $13870.35

Stock: MUTHOOTFIN.NS
  Weight: 17.13%
  Shares: 8
  Investment: $15451.60

Stock: ARVEE.NS
  Weight: 20.32%
  Shares: 118
  Investment: $20284.20

Stock: ICRA.NS
  Weight: 10.06%
  Shares: 1
  Investment: $7231.30

Stock: APOLLO.NS
  Weight: 9.02%
  Shares: 87
  Investment: $8921.85

Stock: NRAIL.NS
  Weight: 2.89%
  Shares: 8
  Investment: $2875.20

Stock: RKFORGE.NS
  Weight: 2.59%
  Shares: 2
  Investment: $1869.10

Stock: HMAAGRO.NS
  Weight: 0.09%
  Shares: 1
  Investment: $44.63

Stock: SPORTKING.NS
  We

In [16]:
stock = 'ESABINDIA'
stock = stock+'.NS'
ticker = yf.Ticker(stock)
current_prices = {}
try:
    current_prices['ESABINDIA'] = ticker.info.get('currentPrice', np.nan)
    print(current_prices)
except KeyError:
    print(f"Price data for {'ESABINDIA'} is not available.")



{'ESABINDIA': 6418.55}
