# Import the necessary Libraries

In [None]:
import pandas as pd
import numpy as np
import random
import requests
import time

## To start our analysis and to comply with compute restrictions I will first analyse the data for a total of 5 companies out of 500

I will first start by studying 5 companies in the same industry so to have a more industry specific insights. Given the companies in that industry I will know the patterns that emerge in that specific industry and from that point on, when I have completed that industry I can generalize it to other companies in the same industry at first and than into other industries

### Advantages:
1. **Domain-Specific patterns**
2. Comparable analysis
3. Controlled complexity
4. Faster iteration
5. Clear baseline

### Information Techonology Industry
1. **Microsoft** 
2. **ServiceNow**
3. **AMD**
4. **Salesforce**
5. **Palantir**

# DCF model

To start I will be conducting the dcf analysis for the yearly data only. Once the Calendar Year Accounts (CY) I will move forward and include the quarterly data aswell. 

### Revenue

In [1]:

yearly = pd.read_csv('./data/financial_statement/MSFT_master.csv')
# First let's create a new dataframe for the DCF
dcf = pd.DataFrame()
dcf['frame'] = yearly['frame']
dcf['Revenue'] = yearly['Income Statement - Revenue - Total Revenue']

# Add revenue growth rate to your DCF model
dcf['Revenue_Growth_Rate'] = yearly['Revenue - Total Revenue'].pct_change()
# Replace NaN in the first row with 0 or remove it as needed
dcf['Revenue_Growth_Rate'] = dcf['Revenue_Growth_Rate'].fillna(0)



NameError: name 'pd' is not defined

### COGS
If I have a column called Cost - Total Cost of Revenue I want to create a new column that populates the COGS column with Total Cost of Revenue column. If the column is not available I want to sum Cost of Product Revenue and Cost of Service Revenue. Before making the addition I want to make sure that these accounts are available

This way the code is more generalizable given the different columns names that I can encounter.

In [38]:
# Create COGS column from available data
if 'Cost - Total Cost of Revenue' in yearly.columns:
    # Direct assignment if column exists
    dcf['COGS'] = yearly['Cost - Total Cost of Revenue']
else:
    # Check if component columns exist and sum them
    product_cost_exists = 'Cost of Product Revenue' in yearly.columns
    service_cost_exists = 'Cost of Service Revenue' in yearly.columns
    
    if product_cost_exists and service_cost_exists:
        dcf['COGS'] = yearly['Cost of Product Revenue'] + yearly['Cost of Service Revenue']
    elif product_cost_exists:
        dcf['COGS'] = yearly['Cost of Product Revenue']
    elif service_cost_exists:
        dcf['COGS'] = yearly['Cost of Service Revenue']
    else:
        # No cost columns available
        dcf['COGS'] = None
        print("Warning: No cost of revenue columns found")

dcf['COGS pct of Revenue'] = dcf['COGS']/dcf['Revenue']




### Gross Profit Margin

In [39]:
if 'Gross - Gross Profit' in yearly.columns:
    dcf['Gross Margin'] = yearly['Gross - Gross Profit']
else: 
    dcf['Gross Margin'] = dcf['Revenue'] - dcf['COGS']



### Operating Expenses

I have few columns that start with Operating Expenses - Specific Name. 
For all the columns that start with Operating Expenses I want to bring them to my DCF dataframe and next to all those account I want a new column that computes Operating Expense / Revenue

In [40]:
op_ex = [col for col in yearly.columns if col.startswith('Operating Expenses')]

for col in op_ex:
    name = col.replace('Operating Expenses -','').strip()
    if name == 'Research and Development' or 'Sales and Marketing' or 'General and Administrative':
        dcf[col] = yearly[col]
        ratio_name = f'{name}/Revenue'
        dcf[ratio_name] = yearly[col]/dcf['Revenue']
    else:
        pass
dcf['Total Operating Expense'] = dcf[op_ex].sum(axis=1)

### EBIT (Operating Income)

In [41]:
if 'Operating - Operating Income' in yearly.columns:
    dcf['EBIT'] = yearly['Operating - Operating Income']
else:
    dcf['EBIT'] = dcf['Gross Margin'] - dcf['Total Operating Expense']

### Tax Expense

In [42]:
if 'Tax - Income Tax Expense/Benefit' in yearly.columns:
    dcf['Tax'] = yearly['Tax - Income Tax Expense/Benefit']
else: 
    # The corporate tax rate in the US is around 21% as of 2024
    dcf['Tax'] = yearly['EBIT']*0.21


### Depretiation and Amortization
Usually found in the CashFlow Statement

In [43]:
if 'Operating - Depreciation and Amortization' in yearly.columns:
    dcf['D&A'] = yearly['Operating - Depreciation and Amortization']
    dcf['pct_growth_revenue'] = dcf['D&A']/dcf['Revenue']
else:
    pass

### Net Working Capital (NWC)
For NWC I need Accounts Receivable, Inventory, Accounts Payable, Accrued Expenses and Deffered Liabilities

In [44]:
cols = ['Assets - Accounts Receivable, Net (Current)','Assets - Inventory','Liabilities - Accounts Payable']
dcf['NWC'] = 0
for col in cols:
    if col in yearly.columns:
        name = col.split('-')[1].strip()
        dcf[name] = yearly[col]
    
        if col.split('-')[0].strip()=='Liabilities':
            dcf['NWC'] = dcf['NWC'] - dcf[name]
            dcf[f'{name}/COGS'] = dcf[name]/dcf['COGS']

        else:
            dcf[f'{name}/Revenue'] = dcf[name]/dcf['Revenue']
            dcf['NWC'] = dcf['NWC'] + dcf[name]
        


In [45]:
# Compute delta NWC
dcf['Delta NWC'] = dcf['NWC'].diff()

# Replace NaN in the first row with 0 or handle it as needed
dcf['Delta NWC'] = dcf['Delta NWC'].fillna(0)

In [46]:
if 'Investing - Capital Expenditures' in yearly.columns:
    dcf['CAPEX'] = yearly['Investing - Capital Expenditures']
    dcf['CAPEX/Revenue'] = dcf['CAPEX']/dcf['Revenue']

### FCF
$$ FCF = EBIT - Tax + D\&A - DeltaNWC - CAPEX$$

In [7]:
import pandas as pd
sp = pd.read_csv('./data/sp500.csv')

In [8]:
sp['GICS Sector'].unique()

array(['Industrials', 'Health Care', 'Information Technology',
       'Utilities', 'Financials', 'Materials', 'Consumer Discretionary',
       'Real Estate', 'Communication Services', 'Consumer Staples',
       'Energy'], dtype=object)

# API call for where different financial model is important

Here I am categorizing different Industry and different analysis metrics. 
- In Information Technology the norme is to compute the **DCF** Model
- In **Financials** We need to compute the *Dividend Discount Model* **DDM**
- In **Real Estate** we need Net Asset Values **NAV**

This is how we will categorize and price these different companies in different sector because this is how a fundamental analyst would have done so 


In [9]:
std_dcf = ['Information Technology','Communication Services','Consumer Discretionary','Consumer Staples','Health Care','Industrials','Materials']
ddm = ['Utilities','Financials']
nav = ['Real Estate','Energy']

In [10]:
it = sp[sp['GICS Sector']=='Information Technology']
cs = sp[sp['GICS Sector']=='Communication Services']
cd = sp[sp['GICS Sector']=='Consumer Discretionary']
css = sp[sp['GICS Sector']=='Consumer Staples']
hc = sp[sp['GICS Sector']=='Health Care']
ind = sp[sp['GICS Sector']=='Industrials']
mat = sp[sp['GICS Sector']=='Materials']
df = pd.concat([it, cs, cd,css,hc,ind,mat], ignore_index=True)
# After your existing code
df_one_per_subindustry = df.groupby('GICS Sub-Industry').first().reset_index()

In [13]:
df_one_per_subindustry

Unnamed: 0.1,GICS Sub-Industry,Unnamed: 0,Symbol,Security,GICS Sector,Headquarters Location,Date added,CIK,Founded
0,Advertising,254,IPG,Interpublic Group of Companies (The),Communication Services,"New York City, New York",1992-10-01,51644,1961 (1930)
1,Aerospace & Defense,54,AXON,Axon Enterprise,Industrials,"Scottsdale, Arizona",2023-05-04,1069183,1993
2,Agricultural & Farm Machinery,141,DE,Deere & Company,Industrials,"Moline, Illinois",1957-03-04,315189,1837
3,Agricultural Products & Services,43,ADM,Archer Daniels Midland,Consumer Staples,"Chicago, Illinois",1957-03-04,7084,1902
4,Air Freight & Logistics,79,CHRW,C.H. Robinson,Industrials,"Eden Prairie, Minnesota",2007-03-02,1043277,1905
...,...,...,...,...,...,...,...,...,...
86,Technology Distributors,92,CDW,CDW,Information Technology,"Vernon Hills, Illinois",2019-09-23,1402057,1984
87,"Technology Hardware, Storage & Peripherals",39,AAPL,Apple Inc.,Information Technology,"Cupertino, California",1982-11-30,320193,1977
88,Tobacco,21,MO,Altria,Consumer Staples,"Richmond, Virginia",1957-03-04,764180,1985
89,Trading Companies & Distributors,190,FAST,Fastenal,Industrials,"Winona, Minnesota",2008-09-15,815556,1967


In [14]:
import pandas as pd
import time

# Everytime I will manually change the Sector name to the one I want to analyze and safe the data.
sp_analysis = df_one_per_subindustry.copy()

# Initialize counter and accounts storage
ix = 0
sp_analysis['accounts'] = None
sp_analysis['question_prompt'] = None

# Process each company
for idx, row in sp_analysis.iterrows():
    symbol = row['Symbol']
    sub_industry = row['GICS Sub-Industry']
    
    # Retry configuration
    max_retries = 3
    retry_count = 0
    success = False
    
    while retry_count < max_retries and not success:
        try:
            print(f'Processing {symbol} (Attempt {retry_count + 1}/{max_retries})')
            
            # Read the financial statement
            df = pd.read_csv(f'./data/financial_statement/{symbol}_master.csv')
            accounts = df.columns.to_list()
            
            # Store accounts in the dataframe
            sp_analysis.at[idx, 'accounts'] = accounts
            
            # Create the prompt
            accounts_str = ', '.join(accounts)
            prompt = (
                "Hey Gemini I want a concise answer. \n" +
                " The Goal is to create a standard DCF model using the mappings you will provide. Take into account the account names for the specific industry and create a mapping code like shown in the example bellow "+
                "Make sure the answer is given in JSON format "+
                "\n ------ \n" +
                f"Here are account names for companies that are in {sub_industry} sub-industry. \n" +
                accounts_str + "I want these accounts to be mapped like this: \n" +
                """I want you to use these accounts and create a mapping like this
                Make sure to always write Sub-Industry name in Pascal format: .... for example {
                "**ApplicationSoftware**": {
                "revenue": [
                "IncomeStatement - Revenues - TotalRevenue"
                ],
                "cogs": [
                "IncomeStatement - Expenses - CostOfRevenue"
                ],
                "operating_expenses": [
                "IncomeStatement - Expenses - SalesAndMarketing",
                "IncomeStatement - Expenses - GeneralAndAdministrative"
                ],
                "d_and_a": [
                "CashFlow - OperatingActivities - DepreciationAndAmortization"
                ],
                "capex": [
                "CashFlow - InvestingActivities - CapitalExpenditures"
                ],
                "stock_based_compensation": [
                "CashFlow - OperatingActivities - ShareBasedCompensation"
                ],
                "nwc_operating_assets": [
                "Assets - AccountsReceivable",
                "Assets - ContractAssets",
                "Assets - CapitalizedContractCosts",
                "Assets - PrepaidExpenses"
                ],
                "nwc_operating_liabilities": [
                "Liabilities - AccountsPayable",
                "Liabilities - AccruedLiabilities",
                "Liabilities - DeferredRevenue"
                ]
                }
                }
                Make sure the answer is just the mapping no introduction sentence no ending sentence just the mapping code"""+ """Don't add any other comments i.e ( // Note: DepreciationAndAmortization, ShareBasedCompensation, and RestructuringCharges are typically excluded
                // from core operating expenses for normalized free cash flow calculation.)"""
                            )
            
            sp_analysis.at[idx, 'question_prompt'] = str(prompt)
            
            # Mark as successful
            success = True
            print(f'Successfully processed {symbol}')
            
        except FileNotFoundError:
            print(f'File not found for {symbol}. Skipping...')
            # Don't retry for file not found errors
            break
            
        except Exception as e:
            retry_count += 1
            print(f'Error processing {symbol}: {e}')
            
            if retry_count < max_retries:
                print(f'Waiting 60 seconds before retry...')
                time.sleep(60)  # Wait 1 minute before retrying
            else:
                print(f'Max retries reached for {symbol}. Moving to next company.')
                # Optionally store error information
                sp_analysis.at[idx, 'error'] = str(e)


Processing IPG (Attempt 1/3)
Successfully processed IPG
Processing AXON (Attempt 1/3)
Successfully processed AXON
Processing DE (Attempt 1/3)
Successfully processed DE
Processing ADM (Attempt 1/3)
Successfully processed ADM
Processing CHRW (Attempt 1/3)
Successfully processed CHRW
Processing ROST (Attempt 1/3)
Successfully processed ROST
Processing LULU (Attempt 1/3)
Successfully processed LULU
Processing ADBE (Attempt 1/3)
Successfully processed ADBE
Processing F (Attempt 1/3)
Successfully processed F
Processing APTV (Attempt 1/3)
Successfully processed APTV
Processing AZO (Attempt 1/3)
Successfully processed AZO
Processing ABBV (Attempt 1/3)
Successfully processed ABBV
Processing TAP (Attempt 1/3)
Successfully processed TAP
Processing FOXA (Attempt 1/3)
Successfully processed FOXA
Processing AMZN (Attempt 1/3)
Successfully processed AMZN
Processing AOS (Attempt 1/3)
Successfully processed AOS
Processing CHTR (Attempt 1/3)
Successfully processed CHTR
Processing JBHT (Attempt 1/3)
Succ

In [11]:
import pandas as pd
import time
import json
import os

# Target accounts we're looking for
target_accounts = {
    'frame': 'frame',
    'total_assets': 'Assets - TotalAssets',
    'total_liabilities': 'Liabilities - TotalLiabilities', 
    'total_equity': 'Equity - TotalStockholdersEquity',
    'total_revenue': 'IncomeStatement - Revenues - TotalRevenue',
    'gross_profit': 'IncomeStatement - OtherIncomeExpense - GrossProfit',
    'operating_income': 'IncomeStatement - OtherIncomeExpense - OperatingIncome',
    'net_income': 'IncomeStatement - NetIncome - NetIncome',
    'comprehensive_income': 'IncomeStatement - NetIncome - ComprehensiveIncome',
    'basic_eps': 'IncomeStatement - EarningsPerShare - BasicEps',
    'diluted_eps': 'IncomeStatement - EarningsPerShare - DilutedEps',
    'shares_basic': 'IncomeStatement - EarningsPerShare - WeightedAverageSharesBasic',
    'shares_diluted': 'IncomeStatement - EarningsPerShare - WeightedAverageSharesDiluted'
}

# Everytime I will manually change the Sector name to the one I want to analyze and save the data.
sp_analysis = df_one_per_subindustry.copy()

# Initialize counter and accounts storage
ix = 0
sp_analysis['accounts'] = None
sp_analysis['question_prompt'] = None
sp_analysis['available_target_accounts'] = None

# Process each company
for idx, row in sp_analysis.iterrows():
    symbol = row['Symbol']
    sub_industry = row['GICS Sub-Industry']
    
    # Retry configuration
    max_retries = 3
    retry_count = 0
    success = False
    
    while retry_count < max_retries and not success:
        try:
            print(f'Processing {symbol} (Attempt {retry_count + 1}/{max_retries})')
            
            # Read the financial statement
            df = pd.read_csv(f'./data/financial_statement/{symbol}_master.csv')
            accounts = df.columns.to_list()
            
            # Store accounts in the dataframe
            sp_analysis.at[idx, 'accounts'] = accounts
            
            # Find which target accounts are available (exact matches)
            available_targets = {}
            for key, target_col in target_accounts.items():
                if target_col in accounts:
                    available_targets[key] = target_col
            
            # Store available target accounts
            sp_analysis.at[idx, 'available_target_accounts'] = available_targets
            
            # Create the prompt for mapping
            accounts_str = ', '.join(accounts)
            
            # Convert sub_industry to PascalCase
            pascal_sub_industry = ''.join(word.capitalize() for word in sub_industry.replace('-', ' ').replace('&', 'And').split())
            
            prompt = (
                "Analyze the financial statement columns and create a JSON mapping for these specific accounts. "
                "Return ONLY the JSON mapping with no additional text or explanations.\n\n"
                f"Industry: {sub_industry}\n"
                f"Available columns: {accounts_str}\n\n"
                "Find exact matches or closest equivalents for these target accounts:\n"
                "1. frame - Time period identifier\n"
                "2. Assets - TotalAssets - Total assets from balance sheet\n"
                "3. Liabilities - TotalLiabilities - Total liabilities from balance sheet\n"
                "4. Equity - TotalStockholdersEquity - Total stockholders equity\n"
                "5. IncomeStatement - Revenues - TotalRevenue - Total revenue/sales\n"
                "6. IncomeStatement - OtherIncomeExpense - GrossProfit - Gross profit\n"
                "7. IncomeStatement - OtherIncomeExpense - OperatingIncome - Operating income\n"
                "8. IncomeStatement - NetIncome - NetIncome - Net income\n"
                "9. IncomeStatement - NetIncome - ComprehensiveIncome - Comprehensive income\n"
                "10. IncomeStatement - EarningsPerShare - BasicEps - Basic earnings per share\n"
                "11. IncomeStatement - EarningsPerShare - DilutedEps - Diluted earnings per share\n"
                "12. IncomeStatement - EarningsPerShare - WeightedAverageSharesBasic - Weighted avg shares basic\n"
                "13. IncomeStatement - EarningsPerShare - WeightedAverageSharesDiluted - Weighted avg shares diluted\n\n"
                f'Return the mapping in this exact JSON format:\n'
                '{\n'
                f'  "{pascal_sub_industry}": {{\n'
                '    "frame": ["exact_column_name"],\n'
                '    "total_assets": ["exact_column_name"],\n'
                '    "total_liabilities": ["exact_column_name"],\n'
                '    "total_equity": ["exact_column_name"],\n'
                '    "total_revenue": ["exact_column_name"],\n'
                '    "gross_profit": ["exact_column_name"],\n'
                '    "operating_income": ["exact_column_name"],\n'
                '    "net_income": ["exact_column_name"],\n'
                '    "comprehensive_income": ["exact_column_name"],\n'
                '    "basic_eps": ["exact_column_name"],\n'
                '    "diluted_eps": ["exact_column_name"],\n'
                '    "shares_basic": ["exact_column_name"],\n'
                '    "shares_diluted": ["exact_column_name"]\n'
                '  }\n'
                '}\n\n'
                "Rules: Use exact column names from the list above. If a column doesn't exist, use empty array []. No comments or extra text."
            )
            
            sp_analysis.at[idx, 'question_prompt'] = str(prompt)
            
            # Mark as successful
            success = True
            print(f'Successfully processed {symbol}')
            print(f'Available target accounts: {len(available_targets)}/{len(target_accounts)}')
            
        except FileNotFoundError:
            print(f'File not found for {symbol}. Skipping...')
            # Don't retry for file not found errors
            break
            
        except Exception as e:
            retry_count += 1
            print(f'Error processing {symbol}: {e}')
            
            if retry_count < max_retries:
                print(f'Waiting 60 seconds before retry...')
                time.sleep(60)  # Wait 1 minute before retrying
            else:
                print(f'Max retries reached for {symbol}. Moving to next company.')
                # Optionally store error information
                sp_analysis.at[idx, 'error'] = str(e)

# Create a summary of what accounts are commonly available
print("\n" + "="*50)
print("SUMMARY OF ACCOUNT AVAILABILITY")
print("="*50)

# Analyze which target accounts are most commonly available
account_availability = {}
for key in target_accounts.keys():
    account_availability[key] = 0

for idx, row in sp_analysis.iterrows():
    if row['available_target_accounts'] is not None:
        for key in row['available_target_accounts'].keys():
            account_availability[key] += 1

total_companies = len(sp_analysis)
print(f"\nTotal companies processed: {total_companies}")
print("\nAccount availability across companies:")
for account, count in account_availability.items():
    percentage = (count / total_companies) * 100
    print(f"  {account}: {count}/{total_companies} ({percentage:.1f}%)")

# Save results
output_file = f'financial_mapping_analysis_{int(time.time())}.csv'
sp_analysis.to_csv(output_file, index=False)
print(f"\nResults saved to: {output_file}")

# Create a sample JSON structure for manual review
sample_mapping = {}
for idx, row in sp_analysis.iterrows():
    if row['available_target_accounts'] is not None and len(row['available_target_accounts']) > 0:
        symbol = row['Symbol']
        sub_industry = row['GICS Sub-Industry']
        pascal_sub_industry = ''.join(word.capitalize() for word in sub_industry.replace('-', ' ').replace('&', 'And').split())
        
        sample_mapping[symbol] = {
            "sub_industry": sub_industry,
            "pascal_sub_industry": pascal_sub_industry,
            "available_accounts": row['available_target_accounts']
        }

# Save sample mapping for reference
sample_file = f'sample_mapping_{int(time.time())}.json'
with open(sample_file, 'w') as f:
    json.dump(sample_mapping, f, indent=2)
print(f"Sample mapping saved to: {sample_file}")

print(f"\nProcessing complete! Review the prompts and use them with your AI to generate the final JSON mappings.")

Processing IPG (Attempt 1/3)
Successfully processed IPG
Available target accounts: 12/13
Processing AXON (Attempt 1/3)
Successfully processed AXON
Available target accounts: 13/13
Processing DE (Attempt 1/3)
Successfully processed DE
Available target accounts: 13/13
Processing ADM (Attempt 1/3)
Successfully processed ADM
Available target accounts: 5/13
Processing CHRW (Attempt 1/3)
Successfully processed CHRW
Available target accounts: 12/13
Processing ROST (Attempt 1/3)
Successfully processed ROST
Available target accounts: 6/13
Processing LULU (Attempt 1/3)
Successfully processed LULU
Available target accounts: 7/13
Processing ADBE (Attempt 1/3)
Successfully processed ADBE
Available target accounts: 13/13
Processing F (Attempt 1/3)
Successfully processed F
Available target accounts: 5/13
Processing APTV (Attempt 1/3)
Successfully processed APTV
Available target accounts: 5/13
Processing AZO (Attempt 1/3)
Successfully processed AZO
Available target accounts: 6/13
Processing ABBV (Atte

In [12]:
import pandas as pd
import google.generativeai as genai
import time
import os

api_key = os.getenv('GEMINI_API_KEY')
# Configure the API key
genai.configure(api_key=api_key)

# Initialize the model with low temperature
model = genai.GenerativeModel(
    'gemini-2.5-flash-preview-05-20',
    generation_config={
        'temperature': 0.01, # Low temperature for more deterministic responses for reproducibility
        'top_p': 0.95, # Top-p sampling for diversity
        'response_mime_type': 'application/json'  # Force JSON output
    }
)

In [13]:
import pandas as pd
import google.generativeai as genai
import time
import os
import json
import re


def parse_json_from_response(response_text):
    """
    Extract JSON content from Gemini's response, handling markdown code blocks
    """
    # Try to find JSON content within ```json ... ``` blocks
    json_pattern = r'```json\s*(.*?)\s*```'
    matches = re.findall(json_pattern, response_text, re.DOTALL)
    
    if matches:
        # Take the first match
        json_str = matches[0].strip()
    else:
        # If no markdown blocks, try to find JSON content within { }
        brace_pattern = r'\{.*\}'
        brace_matches = re.findall(brace_pattern, response_text, re.DOTALL)
        if brace_matches:
            json_str = brace_matches[0].strip()
        else:
            # Assume the entire response is JSON
            json_str = response_text.strip()
    
    try:
        # Parse the JSON string
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        print(f"JSON string attempted: {json_str[:300]}...")  # Show first 300 chars
        return None

def process_financial_mapping_with_gemini(df, output_file='financial_mappings.json'):
    """
    Process each row's financial mapping prompt and save Gemini's response as proper JSON
    """
    # Dictionary to store all financial mappings
    all_mappings = {}
    response_texts = []  # For dataframe column
    parsed_mappings = []  # For dataframe column
    
    total_rows = len(df)
    
    # Process each row
    for index, row in df.iterrows():
        try:
            symbol = row['Symbol']
            prompt = row['question_prompt']
            sub_industry = row['GICS Sub-Industry']
            
            print(f"\n{'='*60}")
            print(f"Processing {index + 1}/{total_rows}: {symbol} - {sub_industry}")
            
            # Send prompt to Gemini
            response = model.generate_content(prompt)
            response_text = response.text
            
            print('Response received from Gemini')
            
            # Parse JSON from response
            parsed_json = parse_json_from_response(response_text)
            
            if parsed_json:
                # If the parsed JSON has a single key (industry name),
                # merge it into our main dictionary
                if isinstance(parsed_json, dict) and len(parsed_json) == 1:
                    all_mappings.update(parsed_json)
                    # Get the key name for storage
                    industry_key = list(parsed_json.keys())[0]
                    parsed_mappings.append(parsed_json[industry_key])
                else:
                    # Otherwise, use the symbol as the key
                    all_mappings[symbol] = parsed_json
                    parsed_mappings.append(parsed_json)
                
                print(f"✓ Successfully parsed JSON mapping for {symbol}")
                response_texts.append(json.dumps(parsed_json, indent=2))
            else:
                print(f"✗ Failed to parse JSON for {symbol}")
                error_info = {
                    "error": "Failed to parse JSON", 
                    "raw_response": response_text[:500],
                    "symbol": symbol,
                    "sub_industry": sub_industry
                }
                all_mappings[f"{symbol}_ERROR"] = error_info
                response_texts.append(response_text)
                parsed_mappings.append(error_info)
            
            print(f"{'='*60}\n")
            
            # Rate limiting - wait between requests
            time.sleep(2)
            
        except Exception as e:
            error_msg = f"Error: {str(e)}"
            print(f"✗ Error processing {symbol}: {str(e)}")
            
            # Store error in the mappings
            error_info = {
                "error": str(e),
                "symbol": symbol,
                "sub_industry": sub_industry,
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
            }
            all_mappings[f"{symbol}_ERROR"] = error_info
            response_texts.append(error_msg)
            parsed_mappings.append(error_info)
            
            # Longer wait on error
            time.sleep(10)
    
    # Write the combined JSON to file
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(all_mappings, f, indent=2, ensure_ascii=False)
        print(f"\n✓ Financial mappings JSON file created: {output_file}")
        
        # Also create a summary file
        summary_file = output_file.replace('.json', '_summary.json')
        summary = {
            "total_companies": total_rows,
            "successful_mappings": len([k for k in all_mappings.keys() if not k.endswith('_ERROR')]),
            "failed_mappings": len([k for k in all_mappings.keys() if k.endswith('_ERROR')]),
            "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
            "industries_processed": list(set(df['GICS Sub-Industry'].tolist()))
        }
        
        with open(summary_file, 'w', encoding='utf-8') as f:
            json.dump(summary, f, indent=2, ensure_ascii=False)
        print(f"✓ Summary file created: {summary_file}")
        
    except Exception as e:
        print(f"\n✗ Error writing JSON file: {e}")
        # Fallback: write as pretty-printed string
        fallback_file = output_file.replace('.json', '_fallback.txt')
        with open(fallback_file, 'w', encoding='utf-8') as f:
            f.write(json.dumps(all_mappings, indent=2, ensure_ascii=False))
        print(f"✓ Fallback file created: {fallback_file}")
    
    # Add responses to dataframe
    df_copy = df.copy()
    df_copy['gemini_response'] = response_texts
    df_copy['parsed_mapping'] = parsed_mappings
    
    return df_copy, all_mappings

def validate_financial_mappings(mappings_dict):
    """
    Validate the financial mappings and provide a report
    """
    validation_report = {
        "total_industries": 0,
        "complete_mappings": 0,
        "partial_mappings": 0,
        "failed_mappings": 0,
        "account_coverage": {},
        "missing_accounts": {}
    }
    
    # Target accounts we're looking for
    target_accounts = [
        'frame', 'total_assets', 'total_liabilities', 'total_equity',
        'total_revenue', 'gross_profit', 'operating_income', 'net_income',
        'comprehensive_income', 'basic_eps', 'diluted_eps', 'shares_basic', 'shares_diluted'
    ]
    
    for industry_key, mapping in mappings_dict.items():
        if industry_key.endswith('_ERROR'):
            validation_report["failed_mappings"] += 1
            continue
            
        validation_report["total_industries"] += 1
        
        if isinstance(mapping, dict) and 'error' not in mapping:
            # Count how many target accounts are mapped
            mapped_accounts = []
            missing_accounts = []
            
            for account in target_accounts:
                if account in mapping and mapping[account] and len(mapping[account]) > 0:
                    mapped_accounts.append(account)
                else:
                    missing_accounts.append(account)
            
            # Classify mapping completeness
            coverage_ratio = len(mapped_accounts) / len(target_accounts)
            if coverage_ratio >= 0.8:  # 80% or more
                validation_report["complete_mappings"] += 1
            elif coverage_ratio >= 0.3:  # 30% or more
                validation_report["partial_mappings"] += 1
            else:
                validation_report["failed_mappings"] += 1
            
            validation_report["account_coverage"][industry_key] = {
                "mapped": mapped_accounts,
                "missing": missing_accounts,
                "coverage_ratio": coverage_ratio
            }
    
    return validation_report

# Process the financial mapping data
print("Starting financial mapping processing with Gemini...")
print("This may take several minutes depending on the number of companies...")

# Process the dataframe
df_processed, financial_mappings = process_financial_mapping_with_gemini(
    sp_analysis, 
    'financial_account_mappings.json'
)

# Validate the mappings
validation_report = validate_financial_mappings(financial_mappings)

print(f"\n{'='*60}")
print("PROCESSING COMPLETE - VALIDATION REPORT")
print(f"{'='*60}")
print(f"Total industries processed: {validation_report['total_industries']}")
print(f"Complete mappings (80%+ coverage): {validation_report['complete_mappings']}")
print(f"Partial mappings (30-79% coverage): {validation_report['partial_mappings']}")
print(f"Failed mappings (<30% coverage): {validation_report['failed_mappings']}")

# Save the processed dataframe
df_processed.to_csv('financial_mapping_results.csv', index=False)
print(f"\n✓ Processed dataframe saved to: financial_mapping_results.csv")

# Save validation report
with open('mapping_validation_report.json', 'w', encoding='utf-8') as f:
    json.dump(validation_report, f, indent=2, ensure_ascii=False)
print(f"✓ Validation report saved to: mapping_validation_report.json")

print(f"\n{'='*60}")
print("FILES CREATED:")
print("1. financial_account_mappings.json - Main mapping file")
print("2. financial_account_mappings_summary.json - Processing summary")
print("3. financial_mapping_results.csv - Dataframe with responses")
print("4. mapping_validation_report.json - Validation analysis")
print(f"{'='*60}")

print("\nProcessing complete! Review the JSON files for your financial account mappings.")

Starting financial mapping processing with Gemini...
This may take several minutes depending on the number of companies...

Processing 1/91: IPG - Advertising
Response received from Gemini
✓ Successfully parsed JSON mapping for IPG


Processing 2/91: AXON - Aerospace & Defense
Response received from Gemini
✓ Successfully parsed JSON mapping for AXON


Processing 3/91: DE - Agricultural & Farm Machinery
Response received from Gemini
✓ Successfully parsed JSON mapping for DE


Processing 4/91: ADM - Agricultural Products & Services
Response received from Gemini
✓ Successfully parsed JSON mapping for ADM


Processing 5/91: CHRW - Air Freight & Logistics
Response received from Gemini
✓ Successfully parsed JSON mapping for CHRW


Processing 6/91: ROST - Apparel Retail
Response received from Gemini
✓ Successfully parsed JSON mapping for ROST


Processing 7/91: LULU - Apparel, Accessories & Luxury Goods
Response received from Gemini
✓ Successfully parsed JSON mapping for LULU


Processing 8/9

In [6]:
import json
import time

def process_dataframe_with_gemini(df, file_name, prompt_column='question_prompt', max_retries=3):
    responses = []
    gemini_responses = []
    
    for index, row in df.iterrows():
        print(f"Processing row {index + 1}/{len(df)}: {row['section']}")
        
        retry_count = 0
        success = False
        
        while retry_count < max_retries and not success:
            try:
                prompt = row[prompt_column]
                
                if row['content'] == '':
                    response_text = ""
                else:
                    response = model.generate_content(prompt)
                    response_text = response.text
                
                responses.append(response_text)
                gemini_responses.append(response_text)
                success = True
                time.sleep(0.01)
                
            except Exception as e:
                retry_count += 1
                print(f"Error on attempt {retry_count}: {str(e)}")
                
                if retry_count < max_retries:
                    wait_time = 65 if "rate limit" in str(e).lower() else 5
                    print(f"Waiting {wait_time} seconds before retry...")
                    time.sleep(wait_time)
                else:
                    error_text = f"Error after {retry_count} attempts: {str(e)}"
                    responses.append(error_text)
                    gemini_responses.append(error_text)
    
    df['gemini_response'] = responses
    
    with open(file_name, 'w', encoding='utf-8') as f:
        json.dump(gemini_responses, f, ensure_ascii=False, indent=2)
    
    return df

In [None]:
prompt = (
                "Analyze the financial statement columns and create a JSON mapping for these specific accounts. "
                "Return ONLY the JSON mapping with no additional text or explanations.\n\n"
                f"Industry: {sub_industry}\n"
                f"Available columns: {accounts_str}\n\n"
                "Find exact matches or closest equivalents for these target accounts:\n"
                "1. frame - Time period identifier\n"
                "2. Assets - TotalAssets - Total assets from balance sheet\n"
                "3. Liabilities - TotalLiabilities - Total liabilities from balance sheet\n"
                "4. Equity - TotalStockholdersEquity - Total stockholders equity\n"
                "5. IncomeStatement - Revenues - TotalRevenue - Total revenue/sales\n"
                "6. IncomeStatement - OtherIncomeExpense - GrossProfit - Gross profit\n"
                "7. IncomeStatement - OtherIncomeExpense - OperatingIncome - Operating income\n"
                "8. IncomeStatement - NetIncome - NetIncome - Net income\n"
                "9. IncomeStatement - NetIncome - ComprehensiveIncome - Comprehensive income\n"
                "10. IncomeStatement - EarningsPerShare - BasicEps - Basic earnings per share\n"
                "11. IncomeStatement - EarningsPerShare - DilutedEps - Diluted earnings per share\n"
                "12. IncomeStatement - EarningsPerShare - WeightedAverageSharesBasic - Weighted avg shares basic\n"
                "13. IncomeStatement - EarningsPerShare - WeightedAverageSharesDiluted - Weighted avg shares diluted\n\n"
                f'Return the mapping in this exact JSON format:\n'
                '{\n'
                f'  "{pascal_sub_industry}": {{\n'
                '    "frame": ["exact_column_name"],\n'
                '    "total_assets": ["exact_column_name"],\n'
                '    "total_liabilities": ["exact_column_name"],\n'
                '    "total_equity": ["exact_column_name"],\n'
                '    "total_revenue": ["exact_column_name"],\n'
                '    "gross_profit": ["exact_column_name"],\n'
                '    "operating_income": ["exact_column_name"],\n'
                '    "net_income": ["exact_column_name"],\n'
                '    "comprehensive_income": ["exact_column_name"],\n'
                '    "basic_eps": ["exact_column_name"],\n'
                '    "diluted_eps": ["exact_column_name"],\n'
                '    "shares_basic": ["exact_column_name"],\n'
                '    "shares_diluted": ["exact_column_name"]\n'
                '  }\n'
                '}\n\n'
                "Rules: Use exact column names from the list above. If a column doesn't exist, use empty array []. No comments or extra text."
            )

In [15]:
sp_analysis.describe()

Unnamed: 0.1,Unnamed: 0,CIK
count,91.0,91.0
mean,152.076923,806798.9
std,130.805473,605579.2
min,0.0,1800.0
25%,43.5,70024.5
50%,107.0,866787.0
75%,229.0,1304052.0
max,481.0,1996810.0


In [16]:
import pandas as pd
import google.generativeai as genai
import time
import os
import json
import re

api_key = os.getenv('GEMINI_API_KEY')
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20')

def parse_json_from_response(response_text):
    """
    Extract JSON content from Gemini's response, handling markdown code blocks
    """
    # Try to find JSON content within ```json ... ``` blocks
    json_pattern = r'```json\s*(.*?)\s*```'
    matches = re.findall(json_pattern, response_text, re.DOTALL)
    
    if matches:
        # Take the first match
        json_str = matches[0].strip()
    else:
        # If no markdown blocks, assume the entire response is JSON
        json_str = response_text.strip()
    
    try:
        # Parse the JSON string
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        print(f"JSON string attempted: {json_str[:200]}...")  # Show first 200 chars
        return None

def process_dataframe_with_gemini(df, output_file='gemini_responses.json'):
    """
    Process each row's prompt and save Gemini's response as proper JSON
    """
    # Dictionary to store all responses
    all_responses = {}
    response_texts = []  # For dataframe column
    
    # Process each row
    for index, row in df.iterrows():
        try:
            prompt = row['question_prompt']
            sub_industry = row['GICS Sub-Industry']
            
            print(f"\n{'='*50}")
            print(f"Processing {sub_industry}")
            
            # Send prompt to Gemini
            response = model.generate_content(prompt)
            response_text = response.text
            
            print('Response received')
            
            # Parse JSON from response
            parsed_json = parse_json_from_response(response_text)
            
            if parsed_json:
                # If the parsed JSON has a single key (like the examples show),
                # merge it into our main dictionary
                if isinstance(parsed_json, dict) and len(parsed_json) == 1:
                    all_responses.update(parsed_json)
                else:
                    # Otherwise, use the sub-industry as the key
                    all_responses[sub_industry] = parsed_json
                
                print(f"Successfully parsed JSON for {sub_industry}")
                response_texts.append(json.dumps(parsed_json))
            else:
                print(f"Failed to parse JSON for {sub_industry}")
                all_responses[sub_industry] = {"error": "Failed to parse JSON", "raw_response": response_text[:500]}
                response_texts.append(response_text)
            
            print(f"{'='*50}\n")
            
            time.sleep(1)
            
        except Exception as e:
            error_msg = f"Error: {str(e)}"
            print(f"Error processing row {row['GICS Sub-Industry']}: {str(e)}")
            
            # Store error in the responses
            all_responses[row['GICS Sub-Industry']] = {"error": str(e)}
            response_texts.append(error_msg)
            
            time.sleep(60)  # Longer wait on error
    
    # Write the combined JSON to file
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(all_responses, f, indent=2, ensure_ascii=False)
        print(f"\n✓ Valid JSON file created: {output_file}")
    except Exception as e:
        print(f"\n✗ Error writing JSON file: {e}")
        # Fallback: write as pretty-printed string
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(json.dumps(all_responses, indent=2, ensure_ascii=False))
    
    # Add responses to dataframe
    df['gemini_response'] = response_texts
    
    return df

# Alternative function if you want to create a JSON array instead
def process_dataframe_with_gemini_array(df, output_file='gemini_responses.json'):
    """
    Process each row's prompt and save as a JSON array
    """
    responses_array = []
    response_texts = []
    
    for index, row in df.iterrows():
        try:
            prompt = row['question_prompt']
            sub_industry = row['GICS Sub-Industry']
            
            print(f"\n{'='*50}")
            print(f"Processing {sub_industry}")
            
            # Send prompt to Gemini
            response = model.generate_content(prompt)
            response_text = response.text
            
            print('Response received')
            
            # Parse JSON from response
            parsed_json = parse_json_from_response(response_text)
            
            if parsed_json:
                # Add metadata
                response_obj = {
                    "sub_industry": sub_industry,
                    "data": parsed_json,
                    "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
                }
                responses_array.append(response_obj)
                response_texts.append(json.dumps(parsed_json))
                print(f"Successfully parsed JSON for {sub_industry}")
            else:
                # Store error
                response_obj = {
                    "sub_industry": sub_industry,
                    "error": "Failed to parse JSON",
                    "raw_response": response_text[:500],
                    "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
                }
                responses_array.append(response_obj)
                response_texts.append(response_text)
                print(f"Failed to parse JSON for {sub_industry}")
            
            print(f"{'='*50}\n")
            
            time.sleep(1)
            
        except Exception as e:
            print(f"Error processing row {sub_industry}: {str(e)}")
            
            response_obj = {
                "sub_industry": sub_industry,
                "error": str(e),
                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
            }
            responses_array.append(response_obj)
            response_texts.append(f"Error: {str(e)}")
            
            time.sleep(60)
    
    # Write JSON array to file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(responses_array, f, indent=2, ensure_ascii=False)
    
    print(f"\n✓ Valid JSON array file created: {output_file}")
    
    # Add responses to dataframe
    df['gemini_response'] = response_texts
    
    return df

# Example usage:
# Using the merged dictionary approach (all industries in one object)
df_processed = process_dataframe_with_gemini(sp_analysis, 'dcf.json')

# Or using the array approach (each industry as separate object in array)
# df_processed = process_dataframe_with_gemini_array(sp_analysis, 'dcf_array.json')

df_processed.to_csv('sp_analysis_with_responses.csv', index=False)
print("Processing complete. Check 'dcf.json' for properly formatted JSON.")


Processing Advertising
Response received
Successfully parsed JSON for Advertising


Processing Aerospace & Defense
Response received
Successfully parsed JSON for Aerospace & Defense


Processing Agricultural & Farm Machinery
Response received
Successfully parsed JSON for Agricultural & Farm Machinery


Processing Agricultural Products & Services
Response received
Successfully parsed JSON for Agricultural Products & Services


Processing Air Freight & Logistics
Response received
Successfully parsed JSON for Air Freight & Logistics


Processing Apparel Retail
Response received
Successfully parsed JSON for Apparel Retail


Processing Apparel, Accessories & Luxury Goods
Response received
Successfully parsed JSON for Apparel, Accessories & Luxury Goods


Processing Application Software
Response received
Successfully parsed JSON for Application Software


Processing Automobile Manufacturers
Response received
Successfully parsed JSON for Automobile Manufacturers


Processing Automotive Parts

import pandas as pd
import google.generativeai as genai
import time
import os

api_key = os.getenv('GEMINI_API_KEY')
genai.configure(api_key=api_key)
model = genai.GenerativeModel('gemini-2.5-flash-preview-05-20')

def process_dataframe_with_gemini(df, output_file='gemini_responses.json'):
    """
    Process each row's prompt and save Gemini's response directly to JSON file
    """
    # Create/open the JSON file for writing
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write("")  # Initialize empty file
    
    responses = []
    
    # Process each row
    for index, row in df.iterrows():
        try:
            prompt = row['question_prompt']
            print(f"\n{'='*50}")
            print(f"Processing {row['GICS Sub-Industry']}")
            
            # Send prompt to Gemini
            response = model.generate_content(prompt)
            response_text = response.text
            print('works')
            print(response_text)
            
            # Append JSON response to file
            with open(output_file, 'a', encoding='utf-8') as f:
                f.write(response_text)
                f.write('\n\n')  # Add vertical space between entries
            
            print(f"\nGemini Response saved to {output_file}") 
            print(f"{'='*50}\n")
            
            responses.append(response_text)
            time.sleep(1)
            
        except Exception as e:
            error_msg = f"Error: {str(e)}"
            print(f"Error processing row {row['GICS Sub-Industry']}: {str(e)}")
            time.sleep(60)
            
            # Also write errors to file
            with open(output_file, 'a', encoding='utf-8') as f:
                f.write(error_msg)
                f.write('\n\n')
            
            responses.append(error_msg)
    
    # Still add responses to dataframe if needed
    df['gemini_response'] = responses
    return df

# Example usage:
df_processed = process_dataframe_with_gemini(sp_analysis, 'dcf.json')
df_processed.to_csv('sp_analysis_with_responses.csv', index=False)
print("Processing complete. Check 'my_gemini_responses.json' for all responses.")

In [None]:
pd.set_option('display.max_colwidth', None)
df_processed.head(10)


Unnamed: 0,frame,IncomeStatement - Revenues - TotalRevenues,"IncomeStatement - CostsAndExpenses - Food,Beverage&PackagingCosts(companyOperated)","IncomeStatement - CostsAndExpenses - Selling,GeneralAndAdministrativeExpense(sg&a)",IncomeStatement - CostsAndExpenses - DepreciationAndAmortization,IncomeStatement - CostsAndExpenses - TotalOperatingExpenses,IncomeStatement - OperatingIncome - OperatingIncome(loss),IncomeStatement - NonOperatingIncomeExpense - Income(loss)FromEquityMethodInvestments,IncomeStatement - NonOperatingIncomeExpense - OtherNonOperatingIncome(expense),IncomeStatement - IncomeBeforeTax - IncomeBeforeIncomeTaxes,...,Liabilities - UnrecognizedTaxBenefits(noncurrent),Liabilities - OtherNoncurrentLiabilities,Liabilities - TotalLiabilities,Equity - CommonStock,Equity - RetainedEarnings,Equity - AccumulatedOtherComprehensiveIncome(loss),Equity - NoncontrollingInterest(equitySection),Equity - Total Stockholders' Equity,Equity - Total Liabilities and Stockholders' Equity,gemini_response
0,CY2007,10435000000.0,,1293000000.0,542000000.0,9078000000.0,1357000000.0,-51000000.0,,,...,,,,,,,,1139000000.0,,Error: Empty prompt
1,CY2008,11304000000.0,,1342000000.0,556000000.0,9787000000.0,1517000000.0,-41000000.0,,1291000000.0,...,296000000.0,,6621000000.0,7000000.0,303000000.0,-418000000.0,14000000.0,-108000000.0,6527000000.0,Error: Empty prompt
2,CY2009,10836000000.0,,1221000000.0,580000000.0,9246000000.0,1590000000.0,36000000.0,,1396000000.0,...,301000000.0,,6034000000.0,253000000.0,996000000.0,-224000000.0,89000000.0,1025000000.0,7148000000.0,Error: Empty prompt
3,CY2010,11343000000.0,,1277000000.0,589000000.0,9574000000.0,1769000000.0,42000000.0,,1594000000.0,...,308000000.0,,6647000000.0,86000000.0,1717000000.0,-227000000.0,93000000.0,1576000000.0,8316000000.0,Error: Empty prompt
4,CY2011,12626000000.0,,1372000000.0,637000000.0,10811000000.0,1815000000.0,47000000.0,,1659000000.0,...,348000000.0,,6918000000.0,18000000.0,2052000000.0,-247000000.0,93000000.0,1823000000.0,8834000000.0,Error: Empty prompt
5,CY2012,13633000000.0,,1510000000.0,665000000.0,11339000000.0,2294000000.0,47000000.0,115000000.0,2145000000.0,...,309000000.0,,6701000000.0,0.0,2286000000.0,-132000000.0,99000000.0,2154000000.0,9013000000.0,Error: Empty prompt
6,CY2013,13084000000.0,,1412000000.0,721000000.0,11286000000.0,1798000000.0,26000000.0,16000000.0,1551000000.0,...,243000000.0,1244000000.0,6427000000.0,0.0,2102000000.0,64000000.0,63000000.0,2166000000.0,8695000000.0,Error: Empty prompt
7,CY2014,13279000000.0,,1419000000.0,328000000.0,11722000000.0,1517000000.0,30000000.0,41000000.0,1374000000.0,...,115000000.0,1244000000.0,6721000000.0,0.0,1737000000.0,-190000000.0,57000000.0,1547000000.0,8334000000.0,Error: Empty prompt
8,CY2015,3951000000.0,,1504000000.0,26000000.0,,441000000.0,41000000.0,,1787000000.0,...,98000000.0,958000000.0,7086000000.0,0.0,1150000000.0,-239000000.0,58000000.0,911000000.0,8061000000.0,Error: Empty prompt
9,CY2015,3951000000.0,,1504000000.0,26000000.0,,441000000.0,41000000.0,,1787000000.0,...,,,,,,,,,,Error: Empty prompt


# Here the goal is to have a DCF model for any company
We will have the framework which means the previous years numbers etc. so we can do an analysis to figure out the prediction models

In [None]:
dcf.head(10).T

NameError: name 'dcf' is not defined