In [37]:
import requests
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import numpy as np
from dotenv import load_dotenv
import os

# API Keys

load_dotenv()

FMP_API_KEY = os.getenv('FMP_API_KEY')
FRED_API_KEY = os.getenv('FRED_API_KEY')
NEWS_API_KEY = os.getenv('NEWS_API_KEY')
ALPHA_VANTAGE_API_KEY = os.getenv('ALPHA_VANTAGE_API_KEY')
# ----------------------------
# Data Fetching Functions
# ----------------------------
def get_financial_data(ticker):
    """Fetch financial data from FMP API and rename volume to avoid conflict"""
    url = f'https://financialmodelingprep.com/api/v3/quote/{ticker}?apikey={FMP_API_KEY}'
    response = requests.get(url)
    data = response.json()
    return {
        'ticker': ticker,
        'price': data[0]['price'],
        'market_cap': data[0]['marketCap'],
        'pe_ratio': data[0]['pe'],
        'volume_financial': data[0]['volume'],  # Renamed to avoid conflict
        'timestamp': datetime.now()
    }

def get_macro_data():
    """Fetch CPI and unemployment rate from FRED API"""
    cpi_url = f'https://api.stlouisfed.org/fred/series/observations?series_id=CPIAUCSL&api_key={FRED_API_KEY}&file_type=json'
    unemployment_url = f'https://api.stlouisfed.org/fred/series/observations?series_id=UNRATE&api_key={FRED_API_KEY}&file_type=json'
    
    cpi_data = requests.get(cpi_url).json()
    unemployment_data = requests.get(unemployment_url).json()
    
    return {
        'CPI': cpi_data['observations'][-1]['value'],
        'unemployment_rate': unemployment_data['observations'][-1]['value']
    }

def get_stock_data(ticker):
    """Fetch stock data from Alpha Vantage (renamed volume to avoid conflict)"""
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'
    response = requests.get(url)
    data = response.json()
    if 'Time Series (Daily)' in data:
        latest_date = max(data['Time Series (Daily)'].keys())
        latest_data = data['Time Series (Daily)'][latest_date]
        return {
            'ticker': ticker,
            'date': latest_date,
            'open': latest_data['1. open'],
            'high': latest_data['2. high'],
            'low': latest_data['3. low'],
            'close': latest_data['4. close'],
            'volume_stock': latest_data['5. volume'],  # Renamed
            'timestamp': datetime.now()
        }
    return {}

def get_sentiment(ticker):
    """Fetch news sentiment (placeholder - needs actual sentiment analysis)"""
    url = f'https://newsapi.org/v2/everything?q={ticker}&apiKey={NEWS_API_KEY}'
    response = requests.get(url)
    news_data = response.json()
    return len(news_data.get('articles', []))  # Placeholder

# ----------------------------
# Data Processing Functions
# ----------------------------
def preprocess_data(df):
    """Clean and prepare data for modeling"""
    # Convert numeric columns
    numeric_cols = [
        'price', 'market_cap', 'pe_ratio', 'volume_financial',
        'open', 'high', 'low', 'close', 'volume_stock',
        'CPI', 'unemployment_rate', 'sentiment'
    ]
    
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Fill missing values
    df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
    
    return df

def predict_deal_success(df):
    """Run predictions (using dummy model)"""
    # Exclude non-numeric columns before scaling
    feature_columns = [
        'price', 'market_cap', 'pe_ratio', 'volume_financial',
        'open', 'high', 'low', 'close', 'volume_stock',
        'CPI', 'unemployment_rate', 'sentiment'
    ]
    
    # Filter only existing features
    valid_features = [col for col in feature_columns if col in df.columns]
    features = df[valid_features]
    
    # Dummy model setup
    model = RandomForestClassifier()
    model.fit(np.random.rand(10, len(valid_features)), np.random.randint(0, 2, 10))
    
    # Scale features
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)
    
    # Add predictions
    df['prediction'] = model.predict(scaled_features)
    return df

# ----------------------------
# Google Sheets Integration
# ----------------------------
def push_to_google_sheets(df):
    """Push results to Google Sheets"""
    # Convert timestamps
    df = df.applymap(lambda x: str(x) if isinstance(x, datetime) else x)
    
    # Auth and upload
    scope = ["https://spreadsheets.google.com/feeds", 
             "https://www.googleapis.com/auth/drive"]
    creds = ServiceAccountCredentials.from_json_keyfile_name('manda-credentials.json', scope)
    client = gspread.authorize(creds)
    
    sheet = client.open_by_key('1ym4CpD5J6OCy_TnC8DEp4fN73yn5hSP7sKQ16_RQEdg')
    worksheet = sheet.get_worksheet(0)
    worksheet.update([df.columns.values.tolist()] + df.values.tolist())

# ----------------------------
# Main Workflow
# ----------------------------
def main(tickers):
    # Fetch and merge data
    all_data = []
    for ticker in tickers:
        try:
            data = {
                **get_financial_data(ticker),
                **get_stock_data(ticker),
                **get_macro_data(),
                'sentiment': get_sentiment(ticker)
            }
            all_data.append(data)
        except Exception as e:
            print(f"Failed to fetch data for {ticker}: {str(e)}")
    
    # Process and predict
    df = pd.DataFrame(all_data)
    df = preprocess_data(df)
    df = predict_deal_success(df)
    
    # Push results
    push_to_google_sheets(df)

if __name__ == '__main__':
    main(['AAPL', 'GOOG', 'AMZN', 'MSFT', 'TSLA'])