In [1]:
from google.cloud import storage
import pandas as pd
import io
import yfinance as yf
from datetime import datetime
import os
import time
import numpy as np

In [2]:
from google.cloud import storage
from google.oauth2 import service_account

In [3]:
import os

# Set the environment variable for the Google Cloud credentials
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/workspaces/financial_data_scraping/jaber-financial-b20f23e23588.json'

# Verify that it has been set correctly
print("GOOGLE_APPLICATION_CREDENTIALS:", os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))


GOOGLE_APPLICATION_CREDENTIALS: /workspaces/financial_data_scraping/jaber-financial-b20f23e23588.json


In [4]:
from google.cloud import storage

def test_gcs_access(bucket_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    print(f"Bucket '{bucket_name}' exists: {bucket.exists()}")

# Replace 'your-bucket-name' with your actual bucket name
test_gcs_access('companies_details')


Bucket 'companies_details' exists: True


In [6]:
# Define Google Cloud Storage bucket name and file path
bucket_name = 'companies_details'
source_blob_name = 'data/companies_details.parquet'

def download_from_gcs(bucket_name, source_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    file_content = blob.download_as_bytes()
    return file_content

# Download the file content from GCS
file_content = download_from_gcs(bucket_name, source_blob_name)

# Load the content into a pandas DataFrame
all_companies = pd.read_parquet(io.BytesIO(file_content))



In [8]:
# Function to fetch price targets
def fetch_price_targets(ticker, isin):
    try:
        ticker_data = yf.Ticker(ticker)
        info = ticker_data.info

        return {
            'ISIN': isin,
            'Ticker': ticker,
            'Current': info.get('currentPrice'),
            'Average': info.get('targetMeanPrice'),
            'High': info.get('targetHighPrice'),
            'Low': info.get('targetLowPrice'),
            'scraping_url_analysis': f"https://finance.yahoo.com/quote/{ticker}/analysis/",
            'scraping_timestamp': datetime.now().isoformat()
        }
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# List of tickers from the DataFrame
tickers = all_companies[['Ticker', 'ISIN']].values.tolist()

# Fetch price targets for each ticker with a delay
analyst_price_target = []
for ticker, isin in tickers:
    price_data = fetch_price_targets(f"{ticker}.MC", isin)
    if price_data:
        analyst_price_target.append(price_data)

    # Introduce random delay between API calls (1 to 3 seconds)
    random_int = np.random.choice([1, 2, 3])
    time.sleep(random_int)

# Convert the results into a DataFrame
analyst_price_target = pd.DataFrame(analyst_price_target)

# Function to upload DataFrame to GCS
def upload_to_gcs(bucket_name, df, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Convert DataFrame to a BytesIO buffer
    buffer = io.BytesIO()
    df.to_parquet(buffer, index=False)
    buffer.seek(0)

    # Upload the buffer to GCS
    blob.upload_from_file(buffer, content_type='application/octet-stream')
    print(f"Data uploaded to {destination_blob_name} in bucket {bucket_name}.")

# Define Google Cloud Storage bucket name and file path
bucket_name = 'analyst_price_target'
destination_blob_name = 'data/analyst_price_target.parquet'

# Upload to GCS
upload_to_gcs(bucket_name, analyst_price_target, destination_blob_name)


Data uploaded to data/analyst_price_target.parquet in bucket analyst_price_target.


In [10]:
# Function to fetch price targets
def fetch_price_targets(ticker, isin):
    try:
        ticker_data = yf.Ticker(ticker)
        info = ticker_data.info

        return {
            'ISIN': isin,
            'Ticker': ticker,
            'Current': info.get('currentPrice'),
            'Average': info.get('targetMeanPrice'),
            'High': info.get('targetHighPrice'),
            'Low': info.get('targetLowPrice'),
            'scraping_url_analysis': f"https://finance.yahoo.com/quote/{ticker}/analysis/",
            'scraping_timestamp': datetime.now().isoformat()
        }
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

# Function to upload DataFrame to GCS
def upload_to_gcs(bucket_name, df, destination_blob_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Convert DataFrame to a BytesIO buffer
    buffer = io.BytesIO()
    df.to_parquet(buffer, index=False)
    buffer.seek(0)

    # Upload the buffer to GCS
    blob.upload_from_file(buffer, content_type='application/octet-stream')
    print(f"Data uploaded to {destination_blob_name} in bucket {bucket_name}.")

# Function to save DataFrame locally
def save_locally(df, local_path):
    df.to_parquet(local_path, index=False)
    print(f"Data saved locally to {local_path}.")

# List of tickers from the DataFrame
tickers = all_companies[['Ticker', 'ISIN']].values.tolist()

# Fetch price targets for each ticker with a delay
analyst_price_target = []
for ticker, isin in tickers:
    price_data = fetch_price_targets(f"{ticker}.MC", isin)
    if price_data:
        analyst_price_target.append(price_data)

    # Introduce random delay between API calls (1 to 3 seconds)
    random_int = np.random.choice([1, 2, 3])
    time.sleep(random_int)

# Convert the results into a DataFrame
analyst_price_target = pd.DataFrame(analyst_price_target)

# Define Google Cloud Storage bucket name and file path
bucket_name = 'analyst_price_target'
destination_blob_name = 'data/analyst_price_target.parquet'

# Define local file path
local_file_path = '/workspaces/financial_data_scraping/data/analyst_price_target.parquet'

# Ensure the local directory exists
local_dir = os.path.dirname(local_file_path)
if not os.path.exists(local_dir):
    os.makedirs(local_dir)

# Save DataFrame locally
save_locally(analyst_price_target, local_file_path)

# Upload to GCS
upload_to_gcs(bucket_name, analyst_price_target, destination_blob_name)

Data saved locally to /workspaces/financial_data_scraping/data/analyst_price_target.parquet.
Data uploaded to data/analyst_price_target.parquet in bucket analyst_price_target.
