## Step 1: Import Libraries

In [17]:
%run ../make_clean_names.py

In [18]:
import requests
import openbb
import polars as pl
from datetime import datetime, timedelta
from typing import List, Dict, Any
import re
from dotenv import load_dotenv
import os
import sys

# Load environment variables
load_dotenv()

# Get API key from environment variables
FMP_API_KEY = os.getenv('FMP_API_KEY')
if not FMP_API_KEY:
    raise ValueError("FMP_API_KEY not found in environment variables")

## Step 2: Import Symbols

In [None]:
def load_symbols(file_path: str) -> List[str]:
    """Load symbols from a text file"""
    try:
        with open(file_path, 'r') as f:
            symbols = [line.strip() for line in f if line.strip()]
        print(f"Loaded {len(symbols)} symbols from {file_path}")
        return symbols
    except Exception as e:
        print(f"Error loading symbols: {str(e)}")
        return []

symbols_file = '../tickers.txt'
symbols = load_symbols(symbols_file)

if symbols:
    print("Symbols:", symbols)
else:
    print("No symbols loaded.")

## Step 3: Extract Data from FMP into Polars

In [14]:
def fetch_company_profile(symbol: str, api_key: str) -> Dict:
    """Fetch company profile data from FMP API"""
    url = f"https://financialmodelingprep.com/api/v3/profile/{symbol}"
    params = {"apikey": api_key}
    
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def load_symbols(file_path: str) -> List[str]:
    """Load symbols from text file"""
    with open(file_path, 'r') as f:
        return [line.strip() for line in f if line.strip()]
    
# Fetch data for each symbol
company_profiles = []
for symbol in symbols:
    try:
        profile = fetch_company_profile(symbol, FMP_API_KEY)
        if profile:  # Some symbols might return empty data
            company_profiles.extend(profile)
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")

if company_profiles:
    # Convert to Polars DataFrame
    df = pl.DataFrame(company_profiles)

## Step 4: Clean Column Names

In [None]:
df = make_clean_names(df)

## Step 5: Write Polars to Parquet

In [None]:
output_dir = "../../../data/finance"

# Write DataFrame to Parquet
df.write_parquet(f'{output_dir}/company_profile.parquet')

## Step 6: Read Parquet (Validate)

In [None]:
pl.scan_parquet(f'{output_dir}/company_profile.parquet').head().collect()