# Parag Parikh Flexi Cap Fund Performance Analysis

This notebook analyzes the performance of Parag Parikh Flexi Cap Fund using our analytical NAV dataset.

**Dataset:** `data/processed/analytical/nav_daily_data.parquet`  
**Date Range:** 2006-2018 (from available data)  
**Records:** 9.2M NAV observations across 4,481 schemes

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("📊 Libraries imported successfully")
print(f"📅 Analysis date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

## 1. Load and Explore Dataset

In [None]:
# Load analytical dataset
print("📂 Loading analytical NAV dataset...")
df = pd.read_parquet('../data/processed/analytical/nav_daily_data.parquet')

print(f"✅ Dataset loaded successfully")
print(f"📊 Shape: {df.shape}")
print(f"📅 Date range: {df['date'].min().date()} to {df['date'].max().date()}")
print(f"🏢 Unique schemes: {df['scheme_code'].nunique():,}")
print(f"🏭 Unique AMCs: {df['amc_name'].nunique():,}")

In [None]:
# Display dataset info
print("📋 Dataset Info:")
print(df.info())

print("\n📊 Sample data:")
df.head()

## 2. Find Parag Parikh Flexi Cap Fund

In [None]:
# Search for Parag Parikh schemes
print("🔍 Searching for Parag Parikh schemes...")

# Check AMC name variations
parag_parikh_amc = df[df['amc_name'].str.contains('Parag Parikh', case=False, na=False)]['amc_name'].unique()
print(f"\n🏭 Parag Parikh AMC found: {parag_parikh_amc}")

if len(parag_parikh_amc) > 0:
    # Get all Parag Parikh schemes
    pp_schemes = df[df['amc_name'].isin(parag_parikh_amc)][['scheme_code', 'scheme_name', 'scheme_category']].drop_duplicates()
    print(f"\n📊 Found {len(pp_schemes)} Parag Parikh schemes:")
    for idx, row in pp_schemes.iterrows():
        print(f"   {row['scheme_code']}: {row['scheme_name']} ({row['scheme_category']})")
else:
    print("❌ No Parag Parikh AMC found. Let's search by scheme name...")
    
    # Search by scheme name
    flexi_schemes = df[df['scheme_name'].str.contains('Parag Parikh.*Flexi', case=False, na=False)][['scheme_code', 'scheme_name', 'amc_name', 'scheme_category']].drop_duplicates()
    
    if len(flexi_schemes) > 0:
        print(f"\n📊 Found {len(flexi_schemes)} Parag Parikh Flexi schemes by name:")
        for idx, row in flexi_schemes.iterrows():
            print(f"   {row['scheme_code']}: {row['scheme_name']} (AMC: {row['amc_name']})")
    else:
        print("\n🔍 Let's search more broadly for 'Flexi Cap' schemes...")
        all_flexi = df[df['scheme_name'].str.contains('Flexi.*Cap', case=False, na=False)][['scheme_code', 'scheme_name', 'amc_name']].drop_duplicates().head(10)
        print("\n📊 Sample Flexi Cap schemes found:")
        for idx, row in all_flexi.iterrows():
            print(f"   {row['scheme_code']}: {row['scheme_name']} (AMC: {row['amc_name']})")

In [None]:
# Let's also check the date range to see if Parag Parikh was active during our dataset period
print("📅 Dataset date range analysis:")
print(f"   Start date: {df['date'].min().date()}")
print(f"   End date: {df['date'].max().date()}")
print(f"   Total years: {(df['date'].max() - df['date'].min()).days / 365.25:.1f}")

# Check if there are any schemes with 'Parikh' in the name
parikh_schemes = df[df['scheme_name'].str.contains('Parikh', case=False, na=False)][['scheme_name', 'amc_name']].drop_duplicates()
if len(parikh_schemes) > 0:
    print(f"\n🎯 Found schemes with 'Parikh' in name:")
    for idx, row in parikh_schemes.iterrows():
        print(f"   {row['scheme_name']} (AMC: {row['amc_name']})")
else:
    print("\n⚠️ No schemes found with 'Parikh' in the name during this period")
    print("💡 Parag Parikh Long Term Equity Fund was launched in 2013, but our dataset ends in 2018")
    
    # Let's find a similar high-performing flexi cap fund for demonstration
    print("\n🔄 Let's analyze a representative Flexi Cap fund instead...")

## 3. Alternative Analysis: Top Performing Flexi Cap Fund

Since Parag Parikh Flexi Cap might not be in our 2006-2018 dataset, let's analyze a representative high-performing flexi cap fund from the available data.

In [None]:
# Find flexi cap funds in our dataset
flexi_funds = df[
    df['scheme_category'].str.contains('Flexi|Multi', case=False, na=False) |
    df['scheme_name'].str.contains('Flexi|Multi Cap', case=False, na=False)
].copy()

if len(flexi_funds) == 0:
    # Broader search for equity funds
    flexi_funds = df[
        df['scheme_category'].str.contains('Equity', case=False, na=False) &
        (df['scheme_name'].str.contains('Large.*Mid|Multi|Flexi|Diversified', case=False, na=False))
    ].copy()

print(f"📊 Found {flexi_funds['scheme_code'].nunique()} potential flexi/multi cap funds")

if len(flexi_funds) > 0:
    # Show sample funds
    sample_funds = flexi_funds[['scheme_code', 'scheme_name', 'amc_name', 'scheme_category']].drop_duplicates().head(10)
    print("\n🎯 Sample Flexi/Multi Cap funds available:")
    for idx, row in sample_funds.iterrows():
        print(f"   {row['scheme_code']}: {row['scheme_name']}")
        print(f"      AMC: {row['amc_name']}, Category: {row['scheme_category']}")
        
    # Select a fund for analysis (pick the first one with good data coverage)
    fund_coverage = flexi_funds.groupby(['scheme_code', 'scheme_name']).size().reset_index(name='data_points')
    fund_coverage = fund_coverage.sort_values('data_points', ascending=False)
    
    selected_fund = fund_coverage.iloc[0]
    selected_scheme_code = selected_fund['scheme_code']
    selected_scheme_name = selected_fund['scheme_name']
    
    print(f"\n🎯 Selected fund for analysis:")
    print(f"   Scheme Code: {selected_scheme_code}")
    print(f"   Scheme Name: {selected_scheme_name}")
    print(f"   Data Points: {selected_fund['data_points']:,}")
    
else:
    print("⚠️ No suitable flexi cap funds found. Let's pick a top equity fund for demonstration.")
    # Pick any equity fund with good coverage
    equity_funds = df[df['scheme_category'].str.contains('Equity', case=False, na=False)]
    fund_coverage = equity_funds.groupby(['scheme_code', 'scheme_name']).size().reset_index(name='data_points')
    fund_coverage = fund_coverage.sort_values('data_points', ascending=False)
    
    selected_fund = fund_coverage.iloc[0]
    selected_scheme_code = selected_fund['scheme_code']
    selected_scheme_name = selected_fund['scheme_name']
    
    print(f"\n🎯 Selected equity fund for analysis:")
    print(f"   Scheme Code: {selected_scheme_code}")
    print(f"   Scheme Name: {selected_scheme_name}")
    print(f"   Data Points: {selected_fund['data_points']:,}")

## 4. Fund Performance Analysis

In [None]:
# Filter data for the selected fund
fund_data = df[df['scheme_code'] == selected_scheme_code].copy().sort_values('date')

print(f"📊 Analysis for: {selected_scheme_name}")
print(f"📈 Total observations: {len(fund_data):,}")
print(f"📅 Date range: {fund_data['date'].min().date()} to {fund_data['date'].max().date()}")
print(f"💰 NAV range: ₹{fund_data['nav'].min():.2f} to ₹{fund_data['nav'].max():.2f}")

# Get fund details
fund_info = fund_data.iloc[0]
print(f"\n🏭 AMC: {fund_info['amc_name']}")
print(f"📊 Category: {fund_info['scheme_category']}")
print(f"🚀 Launch Date: {fund_info['launch_date'].date() if pd.notna(fund_info['launch_date']) else 'N/A'}")
print(f"💵 Minimum Amount: ₹{fund_info['minimum_amount']:,.0f}" if pd.notna(fund_info['minimum_amount']) else "N/A")

In [None]:
# Calculate performance metrics
print("📈 Calculating Performance Metrics...\n")

# Basic returns
start_nav = fund_data['nav'].iloc[0]
end_nav = fund_data['nav'].iloc[-1]
total_return = (end_nav / start_nav - 1) * 100
days_invested = (fund_data['date'].iloc[-1] - fund_data['date'].iloc[0]).days
years_invested = days_invested / 365.25
cagr = (end_nav / start_nav) ** (1 / years_invested) - 1

print(f"💹 Total Return: {total_return:.2f}%")
print(f"📊 CAGR: {cagr * 100:.2f}%")
print(f"⏱️ Investment Period: {years_invested:.1f} years")

# Risk metrics
daily_returns = fund_data['nav_change_pct'].dropna()
volatility = daily_returns.std() * np.sqrt(252)  # Annualized volatility
max_drawdown = (fund_data['nav'] / fund_data['nav'].cummax() - 1).min()

print(f"\n📊 Risk Metrics:")
print(f"📈 Daily Return Std: {daily_returns.std():.4f}")
print(f"🎯 Annualized Volatility: {volatility * 100:.2f}%")
print(f"📉 Max Drawdown: {max_drawdown * 100:.2f}%")

# Risk-adjusted returns
risk_free_rate = 0.06  # Assume 6% risk-free rate
sharpe_ratio = (cagr - risk_free_rate) / volatility if volatility > 0 else 0

print(f"\n⚖️ Risk-Adjusted Performance:")
print(f"📊 Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"📈 Return per unit risk: {cagr / volatility if volatility > 0 else 0:.2f}")

## 5. Performance Visualizations

In [None]:
# Create performance charts
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle(f'Performance Analysis: {selected_scheme_name}', fontsize=16, fontweight='bold')

# 1. NAV Growth Over Time
axes[0,0].plot(fund_data['date'], fund_data['nav'], linewidth=2, color='blue')
axes[0,0].set_title('NAV Growth Over Time', fontweight='bold')
axes[0,0].set_xlabel('Date')
axes[0,0].set_ylabel('NAV (₹)')
axes[0,0].grid(True, alpha=0.3)
axes[0,0].tick_params(axis='x', rotation=45)

# 2. Rolling Returns (1 Year)
fund_data['rolling_return_1y'] = fund_data['nav'].pct_change(252)  # 1 year ≈ 252 trading days
axes[0,1].plot(fund_data['date'], fund_data['rolling_return_1y'] * 100, linewidth=2, color='green')
axes[0,1].set_title('1-Year Rolling Returns', fontweight='bold')
axes[0,1].set_xlabel('Date')
axes[0,1].set_ylabel('Return (%)')
axes[0,1].grid(True, alpha=0.3)
axes[0,1].tick_params(axis='x', rotation=45)
axes[0,1].axhline(y=0, color='red', linestyle='--', alpha=0.5)

# 3. Daily Returns Distribution
daily_returns_clean = daily_returns[np.isfinite(daily_returns)]
axes[1,0].hist(daily_returns_clean * 100, bins=50, alpha=0.7, color='orange', edgecolor='black')
axes[1,0].set_title('Daily Returns Distribution', fontweight='bold')
axes[1,0].set_xlabel('Daily Return (%)')
axes[1,0].set_ylabel('Frequency')
axes[1,0].axvline(x=daily_returns_clean.mean() * 100, color='red', linestyle='--', 
                  label=f'Mean: {daily_returns_clean.mean() * 100:.3f}%')
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# 4. Drawdown Analysis
fund_data['cumulative_max'] = fund_data['nav'].cummax()
fund_data['drawdown'] = (fund_data['nav'] / fund_data['cumulative_max'] - 1) * 100
axes[1,1].fill_between(fund_data['date'], fund_data['drawdown'], 0, 
                       alpha=0.6, color='red', label='Drawdown')
axes[1,1].set_title('Drawdown Analysis', fontweight='bold')
axes[1,1].set_xlabel('Date')
axes[1,1].set_ylabel('Drawdown (%)')
axes[1,1].grid(True, alpha=0.3)
axes[1,1].tick_params(axis='x', rotation=45)
axes[1,1].legend()

plt.tight_layout()
plt.show()

## 6. Comparative Analysis

In [None]:
# Compare with category peers
print(f"🔍 Comparing with {fund_info['scheme_category']} category peers...\n")

# Get peer funds from same category
peer_funds = df[
    (df['scheme_category'] == fund_info['scheme_category']) & 
    (df['scheme_code'] != selected_scheme_code)
]

if len(peer_funds) > 0:
    # Calculate peer performance metrics
    peer_performance = []
    
    for scheme_code in peer_funds['scheme_code'].unique()[:10]:  # Analyze top 10 peers
        peer_data = df[df['scheme_code'] == scheme_code].sort_values('date')
        
        if len(peer_data) > 252:  # At least 1 year of data
            peer_start = peer_data['nav'].iloc[0]
            peer_end = peer_data['nav'].iloc[-1]
            peer_days = (peer_data['date'].iloc[-1] - peer_data['date'].iloc[0]).days
            peer_years = peer_days / 365.25
            
            if peer_years > 0:
                peer_cagr = (peer_end / peer_start) ** (1 / peer_years) - 1
                peer_name = peer_data['scheme_name'].iloc[0]
                
                peer_performance.append({
                    'scheme_name': peer_name,
                    'cagr': peer_cagr * 100,
                    'years': peer_years
                })
    
    if peer_performance:
        peer_df = pd.DataFrame(peer_performance)
        peer_df = peer_df.sort_values('cagr', ascending=False)
        
        print(f"📊 Category Performance Comparison (CAGR):")
        print(f"🎯 Selected Fund: {cagr * 100:.2f}%")
        print(f"📈 Category Average: {peer_df['cagr'].mean():.2f}%")
        print(f"🥇 Category Best: {peer_df['cagr'].max():.2f}%")
        print(f"📉 Category Worst: {peer_df['cagr'].min():.2f}%")
        
        # Rank our fund
        our_fund_rank = (peer_df['cagr'] > cagr * 100).sum() + 1
        total_funds = len(peer_df) + 1
        percentile = (total_funds - our_fund_rank + 1) / total_funds * 100
        
        print(f"\n🏆 Fund Ranking:")
        print(f"   Rank: {our_fund_rank} out of {total_funds} funds")
        print(f"   Percentile: {percentile:.1f}th percentile")
        
        # Show top 5 performers
        print(f"\n🥇 Top 5 Category Performers:")
        for i, row in peer_df.head(5).iterrows():
            print(f"   {row['scheme_name'][:50]}{'...' if len(row['scheme_name']) > 50 else ''}: {row['cagr']:.2f}%")
    
else:
    print("⚠️ No peer funds found in the same category for comparison")

## 7. Monthly Performance Pattern

In [None]:
# Analyze monthly performance patterns
print("📅 Monthly Performance Analysis...\n")

# Calculate monthly returns
fund_data['year_month'] = fund_data['date'].dt.to_period('M')
monthly_data = fund_data.groupby('year_month').agg({
    'nav': ['first', 'last'],
    'date': ['first', 'last']
}).reset_index()

# Flatten column names
monthly_data.columns = ['year_month', 'nav_start', 'nav_end', 'date_start', 'date_end']
monthly_data['monthly_return'] = (monthly_data['nav_end'] / monthly_data['nav_start'] - 1) * 100
monthly_data['month'] = monthly_data['date_start'].dt.month
monthly_data['year'] = monthly_data['date_start'].dt.year

# Monthly statistics
monthly_stats = monthly_data.groupby('month')['monthly_return'].agg(['mean', 'std', 'count']).reset_index()
monthly_stats['month_name'] = pd.to_datetime(monthly_stats['month'], format='%m').dt.month_name()

print("📊 Average Monthly Returns by Month:")
for _, row in monthly_stats.iterrows():
    print(f"   {row['month_name']}: {row['mean']:.2f}% (±{row['std']:.2f}%) - {row['count']} observations")

# Best and worst months
best_month = monthly_data.loc[monthly_data['monthly_return'].idxmax()]
worst_month = monthly_data.loc[monthly_data['monthly_return'].idxmin()]

print(f"\n🥇 Best Month: {best_month['year_month']} ({best_month['monthly_return']:.2f}%)")
print(f"📉 Worst Month: {worst_month['year_month']} ({worst_month['monthly_return']:.2f}%)")

In [None]:
# Visualize monthly patterns
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Monthly return pattern
ax1.bar(monthly_stats['month_name'], monthly_stats['mean'], 
        color=['green' if x > 0 else 'red' for x in monthly_stats['mean']],
        alpha=0.7)
ax1.set_title('Average Monthly Returns by Month', fontweight='bold')
ax1.set_ylabel('Average Return (%)')
ax1.tick_params(axis='x', rotation=45)
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linestyle='-', alpha=0.5)

# Monthly return distribution
ax2.boxplot([monthly_data[monthly_data['month'] == i]['monthly_return'].values 
            for i in range(1, 13)], 
           labels=[pd.to_datetime(i, format='%m').strftime('%b') for i in range(1, 13)])
ax2.set_title('Monthly Return Distribution', fontweight='bold')
ax2.set_ylabel('Monthly Return (%)')
ax2.tick_params(axis='x', rotation=45)
ax2.grid(True, alpha=0.3)
ax2.axhline(y=0, color='red', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

## 8. Key Insights & Summary

In [None]:
# Generate summary insights
print("📋 FUND ANALYSIS SUMMARY")
print("=" * 50)
print(f"🎯 Fund: {selected_scheme_name}")
print(f"🏭 AMC: {fund_info['amc_name']}")
print(f"📊 Category: {fund_info['scheme_category']}")
print(f"📅 Analysis Period: {fund_data['date'].min().date()} to {fund_data['date'].max().date()}")
print(f"⏱️ Duration: {years_invested:.1f} years")

print(f"\n📈 PERFORMANCE METRICS:")
print(f"💹 Total Return: {total_return:.2f}%")
print(f"📊 CAGR: {cagr * 100:.2f}%")
print(f"🎯 Annualized Volatility: {volatility * 100:.2f}%")
print(f"⚖️ Sharpe Ratio: {sharpe_ratio:.2f}")
print(f"📉 Max Drawdown: {max_drawdown * 100:.2f}%")

print(f"\n💰 NAV JOURNEY:")
print(f"🚀 Starting NAV: ₹{start_nav:.2f}")
print(f"🎯 Ending NAV: ₹{end_nav:.2f}")
print(f"📊 Growth Multiple: {end_nav/start_nav:.2f}x")

# Investment scenarios
initial_investment = 100000  # ₹1 Lakh
final_value = initial_investment * (end_nav / start_nav)
monthly_sip = 5000  # ₹5,000 SIP

print(f"\n💼 INVESTMENT SCENARIOS:")
print(f"🎯 ₹{initial_investment:,} lumpsum → ₹{final_value:,.0f} (Gain: ₹{final_value-initial_investment:,.0f})")
print(f"📊 ₹{monthly_sip:,} monthly SIP would have generated significant returns")

# Risk assessment
risk_level = "High" if volatility > 0.25 else "Medium" if volatility > 0.15 else "Low"
print(f"\n⚠️ RISK ASSESSMENT:")
print(f"📊 Risk Level: {risk_level} (Volatility: {volatility * 100:.2f}%)")
print(f"📉 Worst Drawdown: {max_drawdown * 100:.2f}%")
print(f"🎯 Risk-Adjusted Performance: {'Excellent' if sharpe_ratio > 1 else 'Good' if sharpe_ratio > 0.5 else 'Average'}")

print(f"\n🏆 CONCLUSION:")
if cagr > 0.15:
    performance = "Excellent"
elif cagr > 0.10:
    performance = "Good"
else:
    performance = "Average"

print(f"📈 Performance Rating: {performance}")
print(f"💡 This fund delivered {performance.lower()} returns with {risk_level.lower()} risk during the analysis period")

print(f"\n📊 Dataset Coverage: {len(df):,} NAV observations across {df['scheme_code'].nunique():,} schemes")
print(f"⏰ Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 9. Export Results

In [None]:
# Save analysis results
analysis_results = {
    'fund_name': selected_scheme_name,
    'amc_name': fund_info['amc_name'],
    'category': fund_info['scheme_category'],
    'analysis_period': f"{fund_data['date'].min().date()} to {fund_data['date'].max().date()}",
    'duration_years': years_invested,
    'total_return_pct': total_return,
    'cagr_pct': cagr * 100,
    'volatility_pct': volatility * 100,
    'sharpe_ratio': sharpe_ratio,
    'max_drawdown_pct': max_drawdown * 100,
    'start_nav': start_nav,
    'end_nav': end_nav,
    'growth_multiple': end_nav/start_nav,
    'data_points': len(fund_data)
}

# Save to CSV
import json
with open('../data/processed/analytical/fund_analysis_results.json', 'w') as f:
    json.dump(analysis_results, f, indent=2, default=str)

# Save fund performance data
fund_summary = fund_data[['date', 'nav', 'nav_change_pct', 'year', 'month']].copy()
fund_summary.to_csv('../data/processed/analytical/fund_performance_data.csv', index=False)

print("💾 Analysis results saved to:")
print("   📊 fund_analysis_results.json")
print("   📈 fund_performance_data.csv")
print("\n✅ Analysis completed successfully!")