# <center>Portfolio Allocation using GA</center>
___

In [1]:
import os
import pandas as pd
import numpy as np

# set the working directory to the drive where stocks directory is present
os.chdir('C:/Users/Pavan/Desktop/Batch Files/Aug 29/20200829_Batch80A_CSE7221o_GA_Portfolio Allocation_Solution.ipynb/GA_PracticeExercise/stocks/stocks/')

#### Define function to obtain .csv filenames

In [4]:
os.listdir(os.getcwd())

['.ipynb_checkpoints',
 'Genetic Algorithm - Portfolio Allocation-checkpoint.ipynb',
 'hdfc.csv',
 'itc.csv',
 'l&t.csv',
 'm&m.csv',
 'sunpha.csv',
 'tcs.csv']

In [6]:
def obtain_filenames(path):
    
    name = []
    
    # This loop will search all the files in given path and 
    # selects file names which end with .csv
    for file in os.listdir(path):
        if file.endswith('.csv'):
            name.append(file.split(sep = '.')[0]) #name with . split the string
            
    return name

scrip_names = obtain_filenames(os.getcwd())
scrip_names

['hdfc', 'itc', 'l&t', 'm&m', 'sunpha', 'tcs']

#### Define function to calculate returns for each stock

In [7]:
def calculate_returns(scrip, l_o_m):
    
    stck_name_returns = {}
    
    for stck in scrip :
        
        # Read each stock data
        stck_data = pd.read_csv(stck + '.csv', header = 0, sep = ',')
        
        # Calculate returns for each stock according to list_of_months 
        returns = []
        for i in l_o_m:
            present_close = stck_data.loc[0, 'Close']
            last_close = stck_data.loc[0 + i,'Close']
            returns.append((present_close - last_close)/last_close)
        
        stck_name_returns[stck] = returns

    return stck_name_returns

###

list_of_months, index_names = [3, 6, 12, 18, 24, 36], ['3month','6month','12month','18month','24month','36month']

stck_returns = pd.DataFrame(calculate_returns(scrip_names, list_of_months),
                            index = index_names)
stck_returns


Unnamed: 0,hdfc,itc,l&t,m&m,sunpha,tcs
3month,0.114515,0.039664,-0.030947,0.211564,0.13151,0.296599
6month,0.125163,0.011212,0.011417,0.194062,-0.017957,0.368094
12month,0.275866,-0.178478,0.129783,0.330899,0.010911,0.562537
18month,0.750581,0.104171,0.413121,0.514277,-0.109885,0.564125
24month,0.792712,0.084237,0.274461,0.255179,-0.265911,0.44833
36month,0.974847,0.266844,0.069614,0.399938,-0.358785,0.447535


#### Declare global variables

In [8]:
# Constraint : Total amount to be invested - Rs.1,00,000/-
max_allocation = 100000 # maximum budget that can be allocated

weights = [0.30, 0.20, 0.15, 0.15, 0.10, 0.10] # More weight given to most recent returns
mutation_probability = 0.5 # initial mutation probability
number_of_iterations = 500 # number of GA iterations
number_of_chrom_in_population = 500 # population size

#### Generate initial population
___

- Gene : Weightage of a particular scrip in the portfolio
- Chromosome : Sequence of the weightages of different scrips in the porfolio
- Initial Population : Collection of different portfolios randomly generated.


In [9]:
def gen_init_population(pop, num_stocks):
    
    init_population = pd.DataFrame(columns = scrip_names)
    
    # Repeat it 'pop' number of times to generate population
    for i in range(pop):
        
        # For each scrip, generate a random number between 0 and 1. 
        random = np.random.random(num_stocks)
        
        # Normalise this sequence as sum of weightage for scrips - should be equal to one.
        # This sequence is one chromosome. 
        chromosome = random/(sum(random))
        init_population.loc[i] = chromosome
        
    return init_population

gen_init_population(number_of_chrom_in_population, len(scrip_names)).head()

Unnamed: 0,hdfc,itc,l&t,m&m,sunpha,tcs
0,0.209365,0.226434,0.062627,0.215804,0.080517,0.205252
1,0.022006,0.107898,0.372664,0.113664,0.157009,0.226759
2,0.188858,0.174345,0.074357,0.198709,0.113626,0.250106
3,0.114377,0.237681,0.243159,0.111481,0.119062,0.17424
4,0.194572,0.016781,0.430171,0.081841,0.250248,0.026388


#### Define fitness function
The fitness function calculates weighted return for the portfolio 

In [10]:
def fitness_func(max_alloc, population, stock_ret, wts):
    
    # For each scrip, calculate returns weighted by 'n-month' returns
    weighted_stck_returns = stock_ret.apply(lambda x:(x * wts).sum())
    
    # Portfolio return in % considering scrip allocation weightage. 
    # For instance, for stock 1 in a particular portfolio,
    # if the weighted return was 5% and this stock forms 20% of the portfolio, 
    # then % return on this stock is (20000*0.05)/100000
    pfolio_return = population.apply(lambda x: sum((x * max_alloc)* weighted_stck_returns)/max_alloc, axis = 1)
    return pfolio_return


###

fitness_func(max_allocation, 
             gen_init_population(number_of_chrom_in_population, len(scrip_names)),
             stck_returns, 
             weights)


0      0.232510
1      0.204137
2      0.219707
3      0.264350
4      0.110724
         ...   
495    0.271425
496    0.175916
497    0.243495
498    0.135589
499    0.101835
Length: 500, dtype: float64

In [11]:
stck_returns

Unnamed: 0,hdfc,itc,l&t,m&m,sunpha,tcs
3month,0.114515,0.039664,-0.030947,0.211564,0.13151,0.296599
6month,0.125163,0.011212,0.011417,0.194062,-0.017957,0.368094
12month,0.275866,-0.178478,0.129783,0.330899,0.010911,0.562537
18month,0.750581,0.104171,0.413121,0.514277,-0.109885,0.564125
24month,0.792712,0.084237,0.274461,0.255179,-0.265911,0.44833
36month,0.974847,0.266844,0.069614,0.399938,-0.358785,0.447535


In [12]:
weights

[0.3, 0.2, 0.15, 0.15, 0.1, 0.1]

In [13]:
stck_returns.apply(lambda x:(x * weights).sum())

hdfc      0.390110
itc       0.038104
l&t       0.108842
m&m       0.294570
sunpha   -0.041454
tcs       0.421184
dtype: float64

In [14]:
stck_returns.iloc[:,0]

3month     0.114515
6month     0.125163
12month    0.275866
18month    0.750581
24month    0.792712
36month    0.974847
Name: hdfc, dtype: float64

In [15]:
(stck_returns.iloc[:,0] * weights).sum()

0.3901102751307821

#### Define selection function

In [16]:
def selection_func(fit_func, max_alloc, population, stock_ret,wts):
    
    # For each portfolio calculate portfolio returns using fitness_func
    portfolio_return = fit_func(max_alloc, population, stock_ret,wts)
    
    # Select top 20% portfolio returns as elite population
    selected_portfolio = portfolio_return.sort_values(ascending = False)[0 : int(0.2 * population.shape[0])]
    elite = population.iloc[selected_portfolio.index, : ]
    return elite

###
tmp_pop = selection_func(fitness_func, 
                         max_allocation, 
                         gen_init_population(number_of_chrom_in_population, len(scrip_names)),
                         stck_returns, weights)
tmp_pop.head()


Unnamed: 0,hdfc,itc,l&t,m&m,sunpha,tcs
36,0.374497,0.164542,0.022796,0.017052,0.013365,0.407748
445,0.318868,0.00464,0.003248,0.201399,0.125076,0.346769
357,0.291948,0.017648,0.113075,0.39291,0.009887,0.174533
359,0.298684,0.108341,0.043213,0.180664,0.042101,0.326998
374,0.325172,0.053097,0.09374,0.196651,0.051452,0.279889


#### Define evolution function

Arithmetic crossover and scramble mutation has been used - you are free to use any other crossover methods like one-point crossover, multi-point crossover or uniform crossover, and mutation methods like swap mutation and inversion mutation.

In [17]:
def evolution_func(elite_pop, mut_prob, scrip, num_of_chrom_in_pop):
    
    new_pop = pd.DataFrame(columns = scrip)
    new_pop_iter = 0
    
    while(new_pop_iter <= num_of_chrom_in_pop):
        
        # We would generate a random number and if the number is less than mutation probability, 
        # we will go for crossover
        if np.random.random() < mut_prob:
            
            # The candidate is chosen randomly for mutation
            candidate = np.random.randint(low = 0, high = elite_pop.shape[0], size = 1)
            mutant = elite_pop.iloc[candidate,:].copy()
            
            # shuffle() randomly shuffles the allocation values for each scrip in a portfolio
            np.random.shuffle(mutant)
            new_pop = new_pop.append(mutant, ignore_index = True)
            
            # We prefer to explore much more in the beginning of the search process 
            # to ensure diversity and avoid local optimum. 
            # As we progress towards the end of search process ( or iterations), 
            # we need to ensure the convergence of the population to a good solution 
            # if not the best. Hence we keep on reducing mutation probability.
            mut_prob = mut_prob/(new_pop_iter + 1)
            
        else:
            
            # The parents (elite portfolios) for crossover are chosen randomly
            parent_indices = np.random.randint(low = 0, high = elite_pop.shape[0], size = 2)
            parents = elite_pop.iloc[parent_indices,:]
            
            # The proportion of characteristic that should be captured in child from each parent is determined by a 
            # randomly generated number - alpha
            alpha = np.random.random()
            child1 = alpha * (parents.iloc[0, :]) + (1 - alpha) * (parents.iloc[1, :])
            child2 = (1 - alpha) * (parents.iloc[0,:]) + alpha * (parents.iloc[1,:])
            
            new_pop = new_pop.append(dict(child1), ignore_index = True)
            new_pop = new_pop.append(dict(child2), ignore_index = True)
        
        new_pop_iter += 1
        
    # To create a new population, we include both new population and elite population 
    # as there may be a case that we are already at a global optimum solution
    new_pop = pd.concat([new_pop, elite_pop], ignore_index = True)
    return new_pop

###

evolution_func(tmp_pop,
               mutation_probability,
               scrip_names,
               number_of_chrom_in_population).head()


Unnamed: 0,hdfc,itc,l&t,m&m,sunpha,tcs
0,0.221643,0.132454,0.142927,0.084141,0.093253,0.325581
1,0.17568,0.049461,0.308982,0.128266,0.040968,0.296643
2,0.269217,0.080753,0.101548,0.207051,0.112485,0.228947
3,0.252394,0.098368,0.125226,0.22554,0.110431,0.188042
4,0.347764,0.009898,0.259379,0.133726,0.138159,0.111075


#### Define main function

In [18]:
def main_func():
    
    initial_population = gen_init_population(number_of_chrom_in_population,
                                             len(scrip_names))
    # initial_population.head()
    
    elite_population = selection_func(fitness_func,
                                      max_allocation,
                                      initial_population,
                                      stck_returns,
                                      weights)
    elite_population.reset_index(inplace = True, drop = True)
    # elite_population.head()
    
    new_population = evolution_func(elite_population,
                                    mutation_probability,
                                    scrip_names,
                                    number_of_chrom_in_population)

    # We run genetic algorithm for number_of_iterations times
    for i in range(number_of_iterations):
        
        new_elite_population = selection_func(fitness_func,
                                              max_allocation,
                                              new_population,
                                              stck_returns,weights)
        new_population = evolution_func(new_elite_population,
                                        mutation_probability,
                                        scrip_names,
                                        number_of_chrom_in_population)
        
    return new_population

#### Call main function

In [19]:
range_of_portfolios = main_func() 

#### Select the topmost portfolio

In [20]:
top_portfolios = selection_func(fitness_func, 
                                max_allocation, 
                                range_of_portfolios, 
                                stck_returns, 
                                weights)

final_portfolio = top_portfolios.iloc[0]
final_portfolio

hdfc      0.336568
itc       0.088856
l&t       0.056169
m&m       0.217941
sunpha    0.005639
tcs       0.294827
Name: 1246, dtype: float64

#### Calculate portfolio returns

In [21]:
weighted_stck_returns = stck_returns.apply(lambda x:(x * weights).sum())
weighted_stck_returns

hdfc      0.390110
itc       0.038104
l&t       0.108842
m&m       0.294570
sunpha   -0.041454
tcs       0.421184
dtype: float64

In [22]:
portfolio_return = sum((final_portfolio * max_allocation) * weighted_stck_returns)/max_allocation

print('The portfolio return is: {0:.3f}'.format(portfolio_return))

The portfolio return is: 0.329
