In [1]:
from google.cloud import storage
import pandas as pd
import io
import yfinance as yf
from datetime import datetime
import os
import time
import numpy as np

In [2]:
from google.cloud import storage
from google.oauth2 import service_account

In [3]:
import os

# Set the environment variable for the Google Cloud credentials
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/workspaces/financial_data_scraping/jaber-financial-b20f23e23588.json'

# Verify that it has been set correctly
print("GOOGLE_APPLICATION_CREDENTIALS:", os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))


GOOGLE_APPLICATION_CREDENTIALS: /workspaces/financial_data_scraping/jaber-financial-b20f23e23588.json


In [4]:
from google.cloud import storage

def test_gcs_access(bucket_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    print(f"Bucket '{bucket_name}' exists: {bucket.exists()}")

# Replace 'your-bucket-name' with your actual bucket name
test_gcs_access('companies_details')


Bucket 'companies_details' exists: True


In [5]:
# Define Google Cloud Storage bucket name and file path
bucket_name = 'companies_details'
source_blob_name = 'data/companies_details.parquet'

def download_from_gcs(bucket_name, source_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    file_content = blob.download_as_bytes()
    return file_content

# Download the file content from GCS
file_content = download_from_gcs(bucket_name, source_blob_name)

# Load the content into a pandas DataFrame
all_companies = pd.read_parquet(io.BytesIO(file_content))



In [6]:
# Function to fetch historical data
def fetch_historical_data(ticker, exchange_suffix):
    try:
        ticker_with_suffix = ticker + exchange_suffix  # Add dynamic suffix to the ticker
        data = yf.download(ticker_with_suffix, period="max")
        data = data.reset_index()  # Reset index to move date from index to column
        return data
    except Exception as e:
        print(f"Error downloading data for {ticker}: {e}")
        return pd.DataFrame()

# Function to upload DataFrame to GCS
def upload_to_gcs(bucket_name, df, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Convert DataFrame to a BytesIO buffer
    buffer = io.BytesIO()
    df.to_parquet(buffer, index=False)
    buffer.seek(0)

    # Upload the buffer to GCS
    blob.upload_from_file(buffer, content_type='application/octet-stream')
    print(f"Data uploaded to {destination_blob_name} in bucket {bucket_name}.")

# Function to save DataFrame locally
def save_locally(df, local_path):
    df.to_parquet(local_path, index=False)
    print(f"Data saved locally to {local_path}.")

# Define bucket name and URL pattern
bucket_name = 'historical_data_details'
scraping_url = "https://finance.yahoo.com/quote/{ticker}/history/"  # Fixed URL pattern
scraping_timestamp = datetime.now().isoformat()  # Current timestamp
local_data_path = '/workspaces/financial_data_scraping/data/history/'

# Ensure the local directory exists
if not os.path.exists(local_data_path):
    os.makedirs(local_data_path)

# Loop through the 'Ticker' column in all_companies DataFrame
for idx, row in all_companies.iterrows():
    ticker = row['Ticker']
    isin = row['ISIN']
    exchange_suffix = row.get('Exchange_Suffix', '.MC')  # Get exchange suffix from the DataFrame or use default

    # Fetch historical data for the current ticker
    df = fetch_historical_data(ticker, exchange_suffix)

    # If data is retrieved, add additional columns
    if not df.empty:
        # Add ISIN, exchange, and ticker columns
        df['ISIN'] = isin
        df['Exchange'] = exchange_suffix.strip('.')  # Remove leading dot if present
        df['Ticker'] = ticker  # Save the ticker without the suffix

        # Add URL and timestamp columns
        df['scraping_url'] = scraping_url.format(ticker=ticker)
        df['scraping_timestamp'] = scraping_timestamp

        # Reorder columns to place 'Exchange', 'Ticker', 'ISIN' at the beginning
        columns_order = ['Ticker', 'Exchange', 'ISIN'] + [col for col in df.columns if col not in ['Ticker', 'Exchange', 'ISIN', 'scraping_url', 'scraping_timestamp']] + ['scraping_url', 'scraping_timestamp']
        df = df[columns_order]

        # Define local and GCS destination paths
        local_file_path = f"{local_data_path}{ticker}.parquet"
        gcs_blob_name = f"data/history/{ticker}.parquet"

        # Save DataFrame locally
        save_locally(df, local_file_path)

        # Upload DataFrame to GCS
        upload_to_gcs(bucket_name, df, gcs_blob_name)

        print(f"Processed historical data for {ticker}")

    # Introduce random delay between API calls (1 to 3 seconds)
    random_int = np.random.choice([1, 2, 3])
    time.sleep(random_int)

[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ANA.parquet.
Data uploaded to data/history/ANA.parquet in bucket historical_data_details.
Processed historical data for ANA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ACX.parquet.
Data uploaded to data/history/ACX.parquet in bucket historical_data_details.
Processed historical data for ACX


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ACS.parquet.
Data uploaded to data/history/ACS.parquet in bucket historical_data_details.
Processed historical data for ACS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ADZ.parquet.
Data uploaded to data/history/ADZ.parquet in bucket historical_data_details.
Processed historical data for ADZ


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AEDAS.parquet.
Data uploaded to data/history/AEDAS.parquet in bucket historical_data_details.
Processed historical data for AEDAS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AENA.parquet.
Data uploaded to data/history/AENA.parquet in bucket historical_data_details.
Processed historical data for AENA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AIR.parquet.
Data uploaded to data/history/AIR.parquet in bucket historical_data_details.
Processed historical data for AIR


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AI.parquet.
Data uploaded to data/history/AI.parquet in bucket historical_data_details.
Processed historical data for AI


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ALNT.parquet.
Data uploaded to data/history/ALNT.parquet in bucket historical_data_details.
Processed historical data for ALNT


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ALM.parquet.
Data uploaded to data/history/ALM.parquet in bucket historical_data_details.
Processed historical data for ALM


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AMS.parquet.
Data uploaded to data/history/AMS.parquet in bucket historical_data_details.
Processed historical data for AMS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AMP.parquet.
Data uploaded to data/history/AMP.parquet in bucket historical_data_details.
Processed historical data for AMP


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/EAT.parquet.
Data uploaded to data/history/EAT.parquet in bucket historical_data_details.
Processed historical data for EAT


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/APAM.parquet.
Data uploaded to data/history/APAM.parquet in bucket historical_data_details.
Processed historical data for APAM


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/APPS.parquet.
Data uploaded to data/history/APPS.parquet in bucket historical_data_details.
Processed historical data for APPS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/MTS.parquet.
Data uploaded to data/history/MTS.parquet in bucket historical_data_details.
Processed historical data for MTS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ARM.parquet.
Data uploaded to data/history/ARM.parquet in bucket historical_data_details.
Processed historical data for ARM


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/A3M.parquet.
Data uploaded to data/history/A3M.parquet in bucket historical_data_details.
Processed historical data for A3M


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ATRY.parquet.
Data uploaded to data/history/ATRY.parquet in bucket historical_data_details.
Processed historical data for ATRY


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ADX.parquet.
Data uploaded to data/history/ADX.parquet in bucket historical_data_details.
Processed historical data for ADX


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/AZK.parquet.
Data uploaded to data/history/AZK.parquet in bucket historical_data_details.
Processed historical data for AZK


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/BBVA.parquet.
Data uploaded to data/history/BBVA.parquet in bucket historical_data_details.
Processed historical data for BBVA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SAB.parquet.
Data uploaded to data/history/SAB.parquet in bucket historical_data_details.
Processed historical data for SAB


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SAN.parquet.
Data uploaded to data/history/SAN.parquet in bucket historical_data_details.
Processed historical data for SAN


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/BKT.parquet.
Data uploaded to data/history/BKT.parquet in bucket historical_data_details.
Processed historical data for BKT


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/BKY.parquet.
Data uploaded to data/history/BKY.parquet in bucket historical_data_details.
Processed historical data for BKY


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/RIO.parquet.
Data uploaded to data/history/RIO.parquet in bucket historical_data_details.
Processed historical data for RIO


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/BAIN.parquet.
Data uploaded to data/history/BAIN.parquet in bucket historical_data_details.
Processed historical data for BAIN


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CABK.parquet.
Data uploaded to data/history/CABK.parquet in bucket historical_data_details.
Processed historical data for CABK


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CAM.parquet.
Data uploaded to data/history/CAM.parquet in bucket historical_data_details.
Processed historical data for CAM


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CASH.parquet.
Data uploaded to data/history/CASH.parquet in bucket historical_data_details.
Processed historical data for CASH


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CLNX.parquet.
Data uploaded to data/history/CLNX.parquet in bucket historical_data_details.
Processed historical data for CLNX


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/LOG.parquet.
Data uploaded to data/history/LOG.parquet in bucket historical_data_details.
Processed historical data for LOG


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CEV.parquet.
Data uploaded to data/history/CEV.parquet in bucket historical_data_details.
Processed historical data for CEV


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CLEO.parquet.
Data uploaded to data/history/CLEO.parquet in bucket historical_data_details.
Processed historical data for CLEO


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CIE.parquet.
Data uploaded to data/history/CIE.parquet in bucket historical_data_details.
Processed historical data for CIE


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CBAV.parquet.
Data uploaded to data/history/CBAV.parquet in bucket historical_data_details.
Processed historical data for CBAV


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CCEP.parquet.
Data uploaded to data/history/CCEP.parquet in bucket historical_data_details.
Processed historical data for CCEP


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/CAF.parquet.
Data uploaded to data/history/CAF.parquet in bucket historical_data_details.
Processed historical data for CAF


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ANE.parquet.
Data uploaded to data/history/ANE.parquet in bucket historical_data_details.
Processed historical data for ANE


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ALB.parquet.
Data uploaded to data/history/ALB.parquet in bucket historical_data_details.
Processed historical data for ALB


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/OLE.parquet.
Data uploaded to data/history/OLE.parquet in bucket historical_data_details.
Processed historical data for OLE


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/DESA.parquet.
Data uploaded to data/history/DESA.parquet in bucket historical_data_details.
Processed historical data for DESA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/DIA.parquet.
Data uploaded to data/history/DIA.parquet in bucket historical_data_details.
Processed historical data for DIA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/MDF.parquet.
Data uploaded to data/history/MDF.parquet in bucket historical_data_details.
Processed historical data for MDF


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/EBRO.parquet.
Data uploaded to data/history/EBRO.parquet in bucket historical_data_details.
Processed historical data for EBRO


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ENER.parquet.
Data uploaded to data/history/ENER.parquet in bucket historical_data_details.
Processed historical data for ENER


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/EDR.parquet.
Data uploaded to data/history/EDR.parquet in bucket historical_data_details.
Processed historical data for EDR


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ENO.parquet.
Data uploaded to data/history/ENO.parquet in bucket historical_data_details.
Processed historical data for ENO


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/ENG.parquet.
Data uploaded to data/history/ENG.parquet in bucket historical_data_details.
Processed historical data for ENG


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/PUIG.parquet.
Data uploaded to data/history/PUIG.parquet in bucket historical_data_details.
Processed historical data for PUIG


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/RLIA.parquet.
Data uploaded to data/history/RLIA.parquet in bucket historical_data_details.
Processed historical data for RLIA


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/RED.parquet.
Data uploaded to data/history/RED.parquet in bucket historical_data_details.
Processed historical data for RED


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/R4.parquet.
Data uploaded to data/history/R4.parquet in bucket historical_data_details.
Processed historical data for R4


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/REN.parquet.
Data uploaded to data/history/REN.parquet in bucket historical_data_details.
Processed historical data for REN


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/REP.parquet.
Data uploaded to data/history/REP.parquet in bucket historical_data_details.
Processed historical data for REP


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SCYR.parquet.
Data uploaded to data/history/SCYR.parquet in bucket historical_data_details.
Processed historical data for SCYR


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SLR.parquet.
Data uploaded to data/history/SLR.parquet in bucket historical_data_details.
Processed historical data for SLR


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SOL.parquet.
Data uploaded to data/history/SOL.parquet in bucket historical_data_details.
Processed historical data for SOL


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/SQRL.parquet.
Data uploaded to data/history/SQRL.parquet in bucket historical_data_details.
Processed historical data for SQRL


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/TLGO.parquet.
Data uploaded to data/history/TLGO.parquet in bucket historical_data_details.
Processed historical data for TLGO


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/TRE.parquet.
Data uploaded to data/history/TRE.parquet in bucket historical_data_details.
Processed historical data for TRE


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/TEF.parquet.
Data uploaded to data/history/TEF.parquet in bucket historical_data_details.
Processed historical data for TEF


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/TUB.parquet.
Data uploaded to data/history/TUB.parquet in bucket historical_data_details.
Processed historical data for TUB


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/TRG.parquet.
Data uploaded to data/history/TRG.parquet in bucket historical_data_details.
Processed historical data for TRG


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/UNI.parquet.
Data uploaded to data/history/UNI.parquet in bucket historical_data_details.
Processed historical data for UNI


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/UBS.parquet.
Data uploaded to data/history/UBS.parquet in bucket historical_data_details.
Processed historical data for UBS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/VID.parquet.
Data uploaded to data/history/VID.parquet in bucket historical_data_details.
Processed historical data for VID


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/VIS.parquet.
Data uploaded to data/history/VIS.parquet in bucket historical_data_details.
Processed historical data for VIS


[*********************100%***********************]  1 of 1 completed


Data saved locally to /workspaces/financial_data_scraping/data/history/VOC.parquet.
Data uploaded to data/history/VOC.parquet in bucket historical_data_details.
Processed historical data for VOC
