In [81]:
import os
import pandas as pd
import numpy as np

In [93]:
def merge_company_data(mkt_cap_dir, revenue_dir, output_dir="merged_company_data"):
    """
    Merge market cap and revenue data for each company where both datasets are available.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    merged_companies = {}
    
    # Gather tickers from both directories
    mkt_cap_files = [f for f in os.listdir(mkt_cap_dir) if f.endswith('.csv')]
    revenue_files = [f for f in os.listdir(revenue_dir) if f.endswith('.csv')]
    
    # Extract ticker names from filenames
    mkt_cap_tickers = {f.split('_mkt_cap')[0] for f in mkt_cap_files}
    revenue_tickers = {f.split('_revenue')[0] for f in revenue_files}
    
    # Find common tickers in both directories
    common_tickers = mkt_cap_tickers.intersection(revenue_tickers)
    
    for ticker in common_tickers:
        print(f"Processing ticker: {ticker}")
        
        # Read market cap and revenue data for each ticker
        mkt_cap_file = f"{ticker}_mkt_cap_quarter_end.csv"
        mkt_cap_path = os.path.join(mkt_cap_dir, mkt_cap_file)
        mkt_cap_df = pd.read_csv(mkt_cap_path)
        
        revenue_file = f"{ticker}_revenue.csv"
        revenue_path = os.path.join(revenue_dir, revenue_file)
        revenue_df = pd.read_csv(revenue_path)
        
        # Clean and convert dates
        mkt_cap_df['Date'] = pd.to_datetime(mkt_cap_df['Date'].str.split(' ').str[0])
        revenue_df['date'] = pd.to_datetime(revenue_df['date'])
        
        # Extract year and quarter for merging
        mkt_cap_df['year'] = mkt_cap_df['Date'].dt.year
        mkt_cap_df['quarter'] = mkt_cap_df['Date'].dt.quarter
        
        merged_data = []
        
        for _, mkt_cap_row in mkt_cap_df.iterrows():
            matching_revenue = revenue_df[
                (revenue_df['year'] == mkt_cap_row['year']) &
                (revenue_df['quarter'] == mkt_cap_row['quarter'])
            ]
            
            # Check for quarterly or yearly matching
            if matching_revenue.empty:
                yearly_revenue = revenue_df[revenue_df['year'] == mkt_cap_row['year']]
                if not yearly_revenue.empty:
                    revenue_value = yearly_revenue.iloc[-1]['revenue']
                    revenue_growth = yearly_revenue.iloc[-1].get('revenue_yoy_growth', np.nan)
                else:
                    continue
            else:
                revenue_value = matching_revenue.iloc[0]['revenue']
                revenue_growth = matching_revenue.iloc[0].get('revenue_yoy_growth', np.nan)
            
            # Merge row for company
            merged_row = {
                'Date': mkt_cap_row['Date'],
                'Year': mkt_cap_row['year'],
                'Quarter': mkt_cap_row['quarter'],
                'Ticker': ticker,
                'MarketCap': mkt_cap_row['MarketCap'],
                'Revenue': revenue_value,
                'Revenue_YoY_Growth': revenue_growth
            }
            merged_data.append(merged_row)
        
        if merged_data:
            merged_df = pd.DataFrame(merged_data)
            merged_df = merged_df.sort_values(['Date'])
            merged_companies[ticker] = merged_df
            
            # Save merged data for each ticker to CSV
            output_file_path = os.path.join(output_dir, f"{ticker}_merged_data.csv")
            merged_df.to_csv(output_file_path, index=False)
            print(f"Successfully merged data for {ticker} and saved to {output_file_path}")
        else:
            print(f"No matching data found for {ticker}")
    
    return merged_companies


In [None]:
pd.read_csv('company_revenue_results/AAPL_revenue.csv')

In [None]:
import pandas as pd
import os
from glob import glob

# Directory containing the merged company files
input_directory = 'merged_company_data'

# Function to calculate YoY growth and remove rows with NaN
def calculate_yoy_growth_and_update(file_path):
    # Load the CSV
    data = pd.read_csv(file_path)
    
    # Sort by Date to ensure chronological order
    data = data.sort_values(by="Date").reset_index(drop=True)
    
    # Calculate YoY growth: Percent change from the previous year's same quarter
    data['YoY_MktCap_Growth'] = data.groupby('Quarter')['MarketCap'].pct_change(periods=4)
    data['YoY_Revenue_Growth'] = data.groupby('Quarter')['Revenue'].pct_change(periods=4)
    
    # Drop rows with any NaN values
    data = data.dropna()
    
    # Save the updated data back to the same file
    data.to_csv(file_path, index=False)
    print(f"Updated file with YoY growth data: {file_path}")

# Iterate over each merged data file in the directory
for file_path in glob(os.path.join(input_directory, '*_merged_data.csv')):
    calculate_yoy_growth_and_update(file_path)


In [83]:
def calculate_growth_indicator(value, mean, std_dev):
    """
    Calculate the growth indicator based on company-specific mean and standard deviation.
    Returns 1 if value is more than 0.5 standard deviations above the mean,
    -1 if it is more than 0.5 standard deviations below, and 0 if in between.
    """
    if pd.isna(value) or pd.isna(mean) or pd.isna(std_dev):
        return 0  # Return 0 if any inputs are missing (NaN)

    threshold_high = mean + 0.5 * std_dev
    threshold_low = mean - 0.5 * std_dev

    if value > threshold_high:
        return 1
    elif value < threshold_low:
        return -1
    else:
        return 0


In [84]:
import os
import pandas as pd

def calculate_growth_indicator(value, mean, std_dev):
    """
    Calculate the growth indicator based on company-specific mean and standard deviation.
    Returns 1 if value is more than 0.07 standard deviations above the mean,
    -1 if it is more than 0.07 standard deviations below, and 0 if in between.
    """
    if pd.isna(value) or pd.isna(mean) or pd.isna(std_dev):
        return 0  # Return 0 if any inputs are missing (NaN)

    threshold_high = mean + 0.07 * std_dev
    threshold_low = mean - 0.07 * std_dev

    if value > threshold_high:
        return 1
    elif value < threshold_low:
        return -1
    else:
        return 0

def calculate_beta_covariance(df, period_months):
    """
    Calculate covariance of beta over a specified period with improved handling of time series.
    """
    try:
        df = df.copy()

        # Ensure data is sorted by date
        df = df.sort_values('Date')

        # Calculate returns
        df['Returns'] = df['MarketCap'].pct_change()

        # Calculate market returns (using value-weighted market return)
        df['Market_Value'] = df['MarketCap'].sum()
        df['Market_Weight'] = df['MarketCap'] / df['Market_Value']
        df['Market_Returns'] = (df['Returns'] * df['Market_Weight']).sum()

        # Set minimum periods for rolling calculations
        rolling_window = period_months * 3  # Convert months to quarters (assuming quarterly data)

        # Calculate rolling betas
        rolling_cov = df['Returns'].rolling(window=rolling_window).cov(df['Market_Returns'])
        rolling_market_var = df['Market_Returns'].rolling(window=rolling_window).var()

        # Avoid dividing by zero
        df['Beta'] = rolling_cov / rolling_market_var.replace(0, np.nan)

        # Calculate average absolute covariance
        cov_matrix = df['Beta'].dropna().cov()

        return float(cov_matrix) if not pd.isna(cov_matrix) else 0

    except Exception as e:
        print(f"Error in beta covariance calculation: {e}")
        return 0

In [None]:
def load_merged_company_data(merged_data_dir):
    """
    Load merged company data from CSV files in the specified directory.
    """
    company_data = {}
    for filename in os.listdir(merged_data_dir):
        if filename.endswith("_merged_data.csv"):
            ticker = filename.split("_merged_data.csv")[0]
            filepath = os.path.join(merged_data_dir, filename)
            df = pd.read_csv(filepath, parse_dates=['Date'])
            company_data[ticker] = df
    return company_data

def calculate_company_rankings(merged_companies, output_file='company_rankings.csv'):
    """Calculate and rank companies based on the five specified parameters"""
    rankings = []
    
    for company, df in merged_companies.items():
        print(f"Processing company: {company}")
        try:
            # Ensure data is sorted chronologically
            df = df.sort_values('Date')
            
            # 1. Market Cap YoY Growth Indicator
            df['MktCap_YoY_Change'] = df['MarketCap'].pct_change(periods=4) * 100
            mkt_cap_mean = df['MktCap_YoY_Change'].mean()
            mkt_cap_std = df['MktCap_YoY_Change'].std()
            df['MktCap_Growth_Indicator'] = df['MktCap_YoY_Change'].apply(
                lambda x: calculate_growth_indicator(x, mkt_cap_mean, mkt_cap_std)
            )
            
            # 2. Revenue YoY Growth Indicator
            revenue_mean = df['Revenue_YoY_Growth'].mean()
            revenue_std = df['Revenue_YoY_Growth'].std()
            df['Revenue_Growth_Indicator'] = df['Revenue_YoY_Growth'].apply(
                lambda x: calculate_growth_indicator(x, revenue_mean, revenue_std)
            )
            
            # 3. Variance between weighted and simple average
            weighted_avg = (df['MktCap_YoY_Change'] * df['MarketCap']).mean()
            simple_avg = df['MktCap_YoY_Change'].mean()
            variance_avg = abs(weighted_avg - simple_avg)
            
            # 4 & 5. Beta covariances
            print(f"Calculating 6-month beta covariance for {company}")
            beta_6m_cov = calculate_beta_covariance(df, 2)
            
            print(f"Calculating 5-year beta covariance for {company}")
            beta_4y_cov = calculate_beta_covariance(df, 16)
            
            rankings.append({
                'Company': company,
                'MktCap_Growth_Score': df['MktCap_Growth_Indicator'].mean(),
                'Revenue_Growth_Score': df['Revenue_Growth_Indicator'].mean(),
                'Weighted_Simple_Variance': variance_avg,
                'Beta_6M_Covariance': beta_6m_cov,
                'Beta_4Y_Covariance': beta_4y_cov,
                'Date_Range': f"{df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}"
            })
            
            print(f"Successfully processed {company}")
            
        except Exception as e:
            print(f"Error processing company {company}: {e}")
            continue
    
    # Create rankings DataFrame and save to CSV
    rankings_df = pd.DataFrame(rankings)
    rankings_df.to_csv(output_file, index=False)
    return rankings_df

# Load and process merged company data
merged_data_dir = "merged_company_data"
merged_companies = load_merged_company_data(merged_data_dir)

# Calculate company rankings
output_file = "company_rankings.csv"
rankings_df = calculate_company_rankings(merged_companies, output_file)

rankings_df.head()  # Display the first few rows of the result for verification purposes


In [None]:
import os
import pandas as pd

def load_merged_company_data(merged_data_dir):
    """
    Load merged company data from CSV files in the specified directory.
    """
    company_data = {}
    for filename in os.listdir(merged_data_dir):
        if filename.endswith("_merged_data.csv"):
            ticker = filename.split("_merged_data.csv")[0]
            filepath = os.path.join(merged_data_dir, filename)
            df = pd.read_csv(filepath, parse_dates=['Date'])
            company_data[ticker] = df
    return company_data

def calculate_growth_indicator(value, mean, std_dev):
    """
    Calculate the growth indicator based on company-specific mean and standard deviation.
    Returns 1 if value is more than 0.07 standard deviations above the mean,
    -1 if it is more than 0.07 standard deviations below, and 0 if in between.
    """
    if pd.isna(value) or pd.isna(mean) or pd.isna(std_dev):
        return 0  # Return 0 if any inputs are missing (NaN)
    
    threshold_high = mean + 0.07 * std_dev
    threshold_low = mean - 0.07 * std_dev
    
    if value > threshold_high:
        return 1
    elif value < threshold_low:
        return -1
    else:
        return 0

def calculate_beta_covariance(df, period_months):
    """
    Calculate covariance of beta over a specified period with improved handling of time series.
    """
    try:
        df = df.copy()
        
        # Ensure data is sorted by date
        df = df.sort_values('Date')
        
        # Calculate returns for each company
        df['Returns'] = df['MarketCap'].pct_change()
        
        # Calculate market returns (using value-weighted market return)
        market_value_total = df['MarketCap'].sum()
        df['Market_Weight'] = df['MarketCap'] / market_value_total
        df['Market_Returns'] = (df['Returns'] * df['Market_Weight']).sum()
        
        # Set minimum periods for rolling calculations
        rolling_window = period_months * 3  # Convert months to quarters (assuming quarterly data)
        
        # Calculate rolling betas by computing covariance between Returns and Market_Returns
        rolling_cov = df['Returns'].rolling(window=rolling_window).cov(df['Market_Returns'])
        rolling_market_var = df['Market_Returns'].rolling(window=rolling_window).var()
        
        # Avoid dividing by zero
        df['Beta'] = rolling_cov / rolling_market_var.replace(0, np.nan)
        
        # Calculate average absolute covariance of the beta values
        beta_values = df['Beta'].dropna()
        avg_cov = beta_values.cov(beta_values.shift(1)) if len(beta_values) > 1 else 0
        
        return float(avg_cov) if not pd.isna(avg_cov) else 0
        
    except Exception as e:
        print(f"Error in beta covariance calculation: {e}")
        return 0

def calculate_company_rankings(merged_companies, output_file='company_rankings.csv'):
    """Calculate and rank companies based on four specified parameters"""
    rankings = []
    
    for company, df in merged_companies.items():
        print(f"Processing company: {company}")
        try:
            # Ensure data is sorted chronologically
            df = df.sort_values('Date')
            
            # 1. Market Cap YoY Growth Indicator
            df['MktCap_YoY_Change'] = df['MarketCap'].pct_change(periods=4) * 100
            mkt_cap_mean = df['MktCap_YoY_Change'].mean()
            mkt_cap_std = df['MktCap_YoY_Change'].std()
            df['MktCap_Growth_Indicator'] = df['MktCap_YoY_Change'].apply(
                lambda x: calculate_growth_indicator(x, mkt_cap_mean, mkt_cap_std)
            )
            
            # 2. Revenue YoY Growth Indicator
            revenue_mean = df['Revenue_YoY_Growth'].mean()
            revenue_std = df['Revenue_YoY_Growth'].std()
            df['Revenue_Growth_Indicator'] = df['Revenue_YoY_Growth'].apply(
                lambda x: calculate_growth_indicator(x, revenue_mean, revenue_std)
            )
            
            # 3 & 4. Beta covariances
            print(f"Calculating 6-month beta covariance for {company}")
            beta_6m_cov = calculate_beta_covariance(df, 2)
            
            print(f"Calculating 5-year beta covariance for {company}")
            beta_4y_cov = calculate_beta_covariance(df, 16)
            
            rankings.append({
                'Company': company,
                'MktCap_Growth_Score': df['MktCap_Growth_Indicator'].mean(),
                'Revenue_Growth_Score': df['Revenue_Growth_Indicator'].mean(),
                'Beta_6M_Covariance': beta_6m_cov,
                'Beta_4Y_Covariance': beta_4y_cov,
                'Date_Range': f"{df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}"
            })
            
            print(f"Successfully processed {company}")
            
        except Exception as e:
            print(f"Error processing company {company}: {e}")
            continue
    
    # Create rankings DataFrame and save to CSV
    rankings_df = pd.DataFrame(rankings)
    rankings_df.to_csv(output_file, index=False)
    return rankings_df

# Load and process merged company data
merged_data_dir = "merged_company_data"
merged_companies = load_merged_company_data(merged_data_dir)

# Calculate company rankings
output_file = "company_rankings.csv"
rankings_df = calculate_company_rankings(merged_companies, output_file)

rankings_df # Display the first few rows of the result for verification purposes


In [None]:
df=pd.read_csv('merged_company_data/AAPL_merged_data.csv')
df = df.sort_values('Date')
            
# 1. Market Cap YoY Growth Indicator
df['MktCap_YoY_Change'] = df['MarketCap'].pct_change(periods=4) * 100
mkt_cap_mean = df['MktCap_YoY_Change'].mean()
mkt_cap_std = df['MktCap_YoY_Change'].std()
df['MktCap_Growth_Indicator'] = df['MktCap_YoY_Change'].apply(
    lambda x: calculate_growth_indicator(x, mkt_cap_mean, mkt_cap_std)
)
            
# 2. Revenue YoY Growth Indicator
revenue_mean = df['Revenue_YoY_Growth'].mean()
revenue_std = df['Revenue_YoY_Growth'].std()
df['Revenue_Growth_Indicator'] = df['Revenue_YoY_Growth'].apply(
    lambda x: calculate_growth_indicator(x, revenue_mean, revenue_std)
)
            
# 3 & 4. Beta covariances
print(f"Calculating 6-month beta covariance for AAPL")
beta_6m_cov = calculate_beta_covariance(df, 2)
            
print(f"Calculating 5-year beta covariance for AAPL")
beta_4y_cov = calculate_beta_covariance(df, 16)

df
            