# Analysis of MF Data from AMFI and Portfolio Development

This notebook aims to analyze all listed MFs under AMFI (Association of Mutual Funds of India) to provide insights on volatility and historical performance of each fund and leverage the same to suggest recommendations for investment.

In [1]:
# Import neccessary libraries
import pandas as pd
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text

# Initialize environment
load_dotenv()
amfi_data_batchA = os.getenv('amfi_data_batchA')
amfi_data_batchB = os.getenv('amfi_data_batchB')
railway_db_url = os.getenv('railway_db_url')
engine = create_engine(railway_db_url, connect_args={'options': '-c search_path="FINANCIAL_ANALYSIS"'})

# Select execution option
option = input('Select program to run: 1-Data_Load, 2-Funds_Analysis: ')

## Data load program - to be executed only once in the beginning

In [None]:
# Data load program
if option == '1':
    try:
        # Load data from batch A into dataframe and correct date format
        df_batchA = pd.read_csv(amfi_data_batchA)
        df_batchA['date'] = pd.to_datetime(df_batchA['date'], infer_datetime_format= True, errors = 'coerce')
        print(f'Data for top 5 rows from batch A: \n{df_batchA.head(5)}')
        errors_batchA = df_batchA['date'].isna()
        i_batchA = [rows for rows, val in enumerate(errors_batchA) if val == True]
        print(f'Number of records with dates in string and not updated by Pandas in batch A: {len(i_batchA)}')
        df_batchA = df_batchA.dropna(subset=['date'])
        print(f'Number of records in batch A: {len(df_batchA['SNo.'])}')

        # Load data from batch B into dataframe and correct date format
        df_batchB = pd.read_csv(amfi_data_batchB)
        df_batchB['date'] = pd.to_datetime(df_batchB['date'], infer_datetime_format= True, errors='coerce')
        print(f'Data for top 5 rows from batch B: \n{df_batchB.head(5)}')
        errors_batchB = df_batchB['date'].isna()
        i_batchB = [rows for rows, val in enumerate(errors_batchB) if val == True]
        print(f'Number of records with dates in string and not updated by Pandas in batch B: {len(i_batchB)}')
        df_batchB = df_batchB.dropna(subset=['date'])
        print(f'Number of records in batch B: {len(df_batchB['SNo.'])}')

        # Combine batch A and B data
        df_combined = pd.concat([df_batchA, df_batchB], ignore_index=True)
        df_combined = df_combined.rename(columns={'date': 'trx_date'})
        df_combined = df_combined.rename(columns={'SNo.': 's_no'})
        print(f'Data for top 5 rows from consolidated dataframe: \n{df_combined.head(5)}')
        print(f'Number of records in consolidated data: {len(df_combined['s_no'])}')

        # Populate data into database
        with engine.connect() as database_connection:
            for records_start in range(0, len(df_combined), 1000000):
                records_end = records_start + 1000000
                df_chunks = df_combined.iloc[records_start:records_end]
                df_chunks.to_sql(
                    'amfi_database',
                    con=database_connection,
                    schema='FINANCIAL_ANALYSIS',
                    if_exists='append',
                    index=False,
                    method='multi'
                    )
                database_connection.commit()
                print(f'{len(df_chunks)} Committed.')

        query = text('select * from amfi_database;')
        with engine.connect() as database_connection:
            df = pd.read_sql(sql=query, con=database_connection, index_col='trx_id')

        processed_records = len(df['s_no'])
        print(f'Successfully entered {processed_records} into database.')
    except Exception as e:
        print(f'Error: {e}')
else:
    print(f'Selected option 2. Proceeding to execute funds analysis program.')

  df_batchA = pd.read_csv(amfi_data_batchA)
  df_batchA['date'] = pd.to_datetime(df_batchA['date'], infer_datetime_format= True, errors = 'coerce')
  df_batchA['date'] = pd.to_datetime(df_batchA['date'], infer_datetime_format= True, errors = 'coerce')


Data for top 5 rows from batch A: 
   SNo.       date      nav                       fund_name  scheme_code  \
0     0 2008-05-29  10.7205  Standard Chartered Mutual Fund       100027   
1     1 2008-05-28  10.7250  Standard Chartered Mutual Fund       100027   
2     2 2008-05-27  10.7216  Standard Chartered Mutual Fund       100027   
3     3 2008-05-26  10.7206  Standard Chartered Mutual Fund       100027   
4     4 2008-05-23  10.7152  Standard Chartered Mutual Fund       100027   

                                         scheme_name trading_symbol_growth  \
0  Grindlays Super Saver Income Fund-GSSIF-Half Y...                   NaN   
1  Grindlays Super Saver Income Fund-GSSIF-Half Y...                   NaN   
2  Grindlays Super Saver Income Fund-GSSIF-Half Y...                   NaN   
3  Grindlays Super Saver Income Fund-GSSIF-Half Y...                   NaN   
4  Grindlays Super Saver Income Fund-GSSIF-Half Y...                   NaN   

  trading_symbol_reinvestment  
0      

  df_batchB = pd.read_csv(amfi_data_batchB)
  df_batchB['date'] = pd.to_datetime(df_batchB['date'], infer_datetime_format= True, errors='coerce')


Data for top 5 rows from batch B: 
   SNo.       date      nav                     fund_name  scheme_code  \
0     0 2016-02-11  13.0499  ICICI Prudential Mutual Fund       123991   
1     1 2014-12-06  10.8287  ICICI Prudential Mutual Fund       123991   
2     2        NaT  13.1157  ICICI Prudential Mutual Fund       123992   
3     3 2014-12-06  10.8157  ICICI Prudential Mutual Fund       123992   
4     4        NaT  11.8538  ICICI Prudential Mutual Fund       123993   

                                         scheme_name trading_symbol_growth  \
0  ICICI Prudential Fixed Maturity Plan-Series 70...                   NaN   
1  ICICI Prudential Fixed Maturity Plan-Series 70...                   NaN   
2  ICICI Prudential Fixed Maturity Plan-Series 69...                   NaN   
3  ICICI Prudential Fixed Maturity Plan-Series 69...                   NaN   
4  ICICI Prudential Fixed Maturity Plan-Series 69...                   NaN   

  trading_symbol_reinvestment  
0                  

KeyboardInterrupt: 

In [None]:
# Funds analysis program
if option == '2':
    print('Inside Funds Analysis Program')
else:
    print(f'Invalid option {option} selected. Please enter either 1 or 2 as input.')