In [1]:
import os
import pandas as pd
import numpy as np

In [12]:
# change directory if needed 
# os.chdir('')

### Calculate stock returns from closing value of each stock for every month

##### Define function to obtain .csv filenames

In [2]:
def obtain_filenames(path):
    name = []
#     This loop will search all the files in given path and selects file names which end with .csv
    for file in os.listdir(path):
        if file.endswith('.csv'):
            name.append(file.split(sep='.')[0])
#     name contains list of file names
    return name

##### Define function to calculate returns for each stock

In [3]:
def calculate_returns(scrip,l_o_m):
    stck_name_returns = {}
    for stck in scrip:
#         Read each stock data
        stck_data = pd.read_csv(stck + '.csv',header=0,sep=',')
        returns = []
#         Calculate returns for each stock according to list_of_months and append to stck_name_returns dictionary
        for i in l_o_m:
            present_close = stck_data.loc[0,'Close']
            last_close = stck_data.loc[0 + i,'Close']
            returns.append((present_close - last_close)/last_close)
        stck_name_returns[stck] = returns
    return stck_name_returns

In [4]:
scrip_names = obtain_filenames(os.getcwd())
scrip_names

['hdfc', 'itc', 'l&t', 'm&m', 'sunpha']

In [5]:
list_of_months = [3,6,12,18,24,36]

In [6]:
stck_returns = pd.DataFrame(calculate_returns(scrip_names,list_of_months),
                            index=['3month','6month','12month','18month','24month','36month'])
stck_returns

Unnamed: 0,hdfc,itc,l&t,m&m,sunpha
3month,0.114515,0.039664,-0.030947,0.211564,0.13151
6month,0.125163,0.011212,0.011417,0.194062,-0.017957
12month,0.275866,-0.178478,0.129783,0.330899,0.010911
18month,0.750581,0.104171,0.413121,0.514277,-0.109885
24month,0.792712,0.084237,0.274461,0.255179,-0.265911
36month,0.974847,0.266844,0.069614,0.399938,-0.358785


### Genetic Algorithm for portfolio allocation

In [7]:
# Gene for this problem would be the percentage of amount to be allocated for a particular stock
# Chromosome would be the percentage of amount to be allocated for each stock
# The constraint is total amount to be invested is Rs.1,00,000/-
# Weighted returns for each stock is calculated according to weights list, with more weight given to most recent returns

##### Declare global variables

In [8]:
max_allocation = 100000 # maximum budget that can be allocated
weights = [0.30,0.20,0.15,0.15,0.10,0.10] # weights for n-month returns
mutation_probability = 0.5 # initial mutation probability
number_of_iterations = 500 # total number of GA iterations
number_of_chrom_in_population = 500 # size of population

##### Generate initial population
In this context, a gene is weightage of a particular scrip in the portfolio, and chromosome is a sequence of the weightages of different scrips in the porfolio. Initial population is a collection of different portfolios randomly generated.

In [9]:
def gen_init_population(pop,num_stocks):
    init_population = pd.DataFrame(columns=scrip_names)
    for i in range(pop):
#         For each scrip, generate a random number between 0 and 1. Normalise this sequence as sum of weightage for scrips
#         should be equal to one. This sequence is one chromosome. Repeat it 'pop' number of times to generate population
        random = np.random.random(num_stocks)
        chromosome = random/(sum(random))
        init_population.loc[i] = chromosome
    return init_population

In [11]:
# gen_init_population(number_of_chrom_in_population,len(scrip_names)).head()
# gen_init_population(number_of_chrom_in_population,len(scrip_names)).shape

##### Define fitness function
The fitness function calculates weighted return for the portfolio 

In [12]:
def fitness_func(max_alloc,population,stock_ret,wts):
#     For each scrip, calculate returns weighted by 'n-month' returns
    weighted_stck_returns = stock_ret.apply(lambda x:(x*wts).sum())
#     Portfolio return in % considering scrip allocation weightage. For instance, for stock 1 in a particular portfolio,
#     if the weighted return was 5% and this stock forms 20% of the portfolio, then % return on this stock is 
#     (20000*0.05)/100000
    pfolio_return = population.apply(lambda x: sum((x*max_alloc)*weighted_stck_returns)/max_alloc, axis = 1)
    return pfolio_return

##### Define selection function

In [13]:
def selection_func(fit_func,max_alloc,population,stock_ret,wts):
#     For each portfolio calculate portfolio returns using fitness_func
    portfolio_return = fit_func(max_alloc,population,stock_ret,wts)
#     Select top 20% portfolio returns as elite population
    selected_portfolio = portfolio_return.sort_values(ascending=False)[0:int(0.2*population.shape[0])]
    elite = population.iloc[selected_portfolio.index,:]
    return elite

In [14]:
tmp_pop = selection_func(fitness_func,max_allocation,gen_init_population(number_of_chrom_in_population,len(scrip_names)),
                         stck_returns,weights)
tmp_pop.head()

Unnamed: 0,hdfc,itc,l&t,m&m,sunpha
413,0.412787,0.106053,0.012495,0.465113,0.003552
271,0.621765,0.046832,0.098409,0.145956,0.087037
15,0.360083,0.031645,0.194567,0.403442,0.010264
167,0.510349,0.134152,0.008272,0.252304,0.094923
1,0.439657,0.089649,0.079929,0.295431,0.095334


##### Define evolution function
Arithmetic crossover and scramble mutation has been used - you are free to use any other crossover methods like
one-point crossover, multi-point crossover or uniform crossover, and mutation methods like swap mutation and inversion
mutation

In [15]:
def evolution_func(elite_pop,mut_prob,scrip,num_of_chrom_in_pop):
    new_pop = pd.DataFrame(columns=scrip)
    new_pop_iter = 0
    while(new_pop_iter <= num_of_chrom_in_pop):
#         We would generate a random number and if the number is less than mutation probability, we will go for crossover
#         Initially, we will have high value of mutation probability to avoid being getting stuck at local minima and
#         explore the search space
        if np.random.random() < mut_prob:
#             The candidate (or one of the elite portfolios) is chosen randomly for mutation
            candidate = np.random.randint(low=0,high=elite_pop.shape[0],size=1)
            mutant = elite_pop.iloc[candidate,:].copy()
#             Mutation step - shuffle() randomly shuffles the allocation values for each scrip in a portfolio
            np.random.shuffle(mutant)
#             print(mutant)
            new_pop = new_pop.append(mutant,ignore_index=True)
#             We prefer to explore much more in the beginning of the search process to ensure diversity and avoid local 
#             optimum. As we progress towards the end of search process ( or iterations), we need to ensure the
#             convergence of the population to a good solution if not the best. Hence we keep on reducing mutation 
#             probability with each iteraation
            mut_prob = mut_prob/(new_pop_iter+1)
        else:
#             The parents (elite portfolios) for crossover are chosen randomly
            parent_indices = np.random.randint(low=0,high=elite_pop.shape[0],size=2)
            parents = elite_pop.iloc[parent_indices,:]
#             The proportion of characteristic that should be captured in child from each parent is determined by a 
#             randomly generated number - alpha
            alpha = np.random.random()
            child1 = alpha*(parents.iloc[0,:]) + (1-alpha)*(parents.iloc[1,:])
            child2 = (1-alpha)*(parents.iloc[0,:]) + alpha*(parents.iloc[1,:])
#             print(child1)
#             print(child2)
#             print(dict(child1))
#             print(dict(child2))
            new_pop = new_pop.append(dict(child1),ignore_index=True)
            new_pop = new_pop.append(dict(child2),ignore_index=True)
        new_pop_iter+=1
#     To create a new population, we include both new population and elite population as there may be a case that we are 
#     already at a global optimum solution
    new_pop = pd.concat([new_pop,elite_pop],ignore_index=True)
    return new_pop

In [16]:
evolution_func(tmp_pop,mutation_probability,scrip_names,number_of_chrom_in_population).head()

Unnamed: 0,hdfc,itc,l&t,m&m,sunpha
0,0.177123,0.171751,0.037726,0.47657,0.136832
1,0.598833,0.053275,0.095279,0.163007,0.089605
2,0.356533,0.100584,0.141733,0.32128,0.079871
3,0.327598,0.119296,0.14225,0.375477,0.035379
4,0.354172,0.093064,0.337143,0.157754,0.057868


##### Define main function

In [17]:
def main_func():
    initial_population = gen_init_population(number_of_chrom_in_population,len(scrip_names))
#         initial_population.head()
    elite_population = selection_func(fitness_func,max_allocation,initial_population,stck_returns,weights)
    elite_population.reset_index(inplace=True,drop=True)
#         elite_population.head()
    new_population = evolution_func(elite_population,mutation_probability,scrip_names,number_of_chrom_in_population)
#     We run genetic algorithm for number_of_iterations times
    for i in range(number_of_iterations):
        new_elite_population = selection_func(fitness_func,max_allocation,new_population,stck_returns,weights)
        new_population = evolution_func(new_elite_population,mutation_probability,scrip_names,number_of_chrom_in_population)
    return new_population

##### Call main function

In [18]:
range_of_portfolios = main_func() #Final population

##### Select the topmost portfolio

In [19]:
top_portfolios = selection_func(fitness_func,max_allocation,range_of_portfolios,stck_returns,weights)
final_portfolio = top_portfolios.iloc[0]
final_portfolio

hdfc      0.446721
itc       0.005231
l&t       0.148396
m&m       0.338281
sunpha    0.061371
Name: 1246, dtype: float64

##### Calculate portfolio returns

In [20]:
weighted_stck_returns = stck_returns.apply(lambda x:(x*weights).sum())
weighted_stck_returns

hdfc      0.390110
itc       0.038104
l&t       0.108842
m&m       0.294570
sunpha   -0.041454
dtype: float64

In [21]:
portfolio_return = sum((final_portfolio*max_allocation)*weighted_stck_returns)/max_allocation
# In the past three years, Nifty 50 has given a return of 27.4%. Using only one stock from each sector 
# (all of them are top players by market capitalization, except m&m), we got a portfolio return of 27.1%. 
# That too excluding any expense ratio that you would have incurred for nifty 50 indexed fund (roughly it is around 0.5%).
# If you do some research, you are beating some mutual funds in this category as well!!
print('The portfolio return is: {0:.3f}'.format(portfolio_return))

The portfolio return is: 0.288
