In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
import requests
import math
import xlsxwriter
from scipy import stats
from typing import Optional
import os
from io import BytesIO
import pyarrow as pa
import pyarrow.parquet as pq

from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

from utils import (
    calculate_period_return,
    get_price_info,
    calculate_price_to_cash_flow,
    scrape_stock_measures,
)

from blob_utils import (
    get_blob_service_client_connection_string,
    download_parquet_blob_to_df,
    upload_to_blob,
    upload_to_blob_from_file,
)

In [17]:
stocks = pd.read_csv("/workspaces/codespaces-jupyter/data/constituents_csv.csv")
# print(stocks)
stock_measure_df = scrape_stock_measures(stocks=stocks)["stock_measures"]

In [21]:
to_upload = stock_measure_df.iloc[:len(stock_measure_df)//2]
to_upload_2 = stock_measure_df.iloc[len(stock_measure_df)//2:]
# stock_measure_df["stock_measures"].to_parquet(
#     "./stock_measures.parquet", index=False, compression="snappy"
# )

table = pa.Table.from_pandas(to_upload)

# Create a BytesIO buffer to hold the Parquet data
buffer = pa.BufferOutputStream()

# Write the Table to the buffer in Parquet format
pq.write_table(table, buffer)

# Get the buffer's content
parquet_data = buffer.getvalue().to_pybytes()

In [20]:
print(to_upload)

    ticker current_price trailing_pe trailing_earnings_per_share  \
0      MMM        100.01   10.331612                        9.68   
1      AOS         68.92    43.34591                        1.59   
2      ABT        103.71   31.522797                        3.29   
3     ABBV        150.14   35.327057                        4.25   
4      ACN        302.94   27.024086                       11.21   
..     ...           ...         ...                         ...   
185    IEX        219.42   28.422281                        7.72   
186   IDXX        483.73    58.35103                        8.29   
187    ITW        235.78   23.601603                        9.99   
188   INCY         64.04    44.78322                        1.43   
189     IR         66.49   42.082275                        1.58   

    free_cash_flow cash_per_share price_to_cash_flow return_on_equity  \
0      -2560000000          7.815                0.0         -0.13371   
1        402049984          2.722    

In [29]:
# AZURE_STORAGE_ACCESS_KEY = os.environ.get("AZURE_STORAGE_ACOUNT_KEY")
# azure_storage_account_name = os.environ.get("AZURE_STORAGE_ACCOUNT_NAME")
azure_storage_connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
# print(list(os.environ.keys()))

# print(azure_storage_connection_string)


blob_service_client = get_blob_service_client_connection_string(
    azure_storage_connection_string=azure_storage_connection_string,
)

downloaded_file = download_parquet_blob_to_df(
    blob_service_client=blob_service_client,
    container_name="first-stock-blob",
    blob_name="stock_measures.parquet",
)


# to_upload = pd.concat([downloaded_file, stock_measure_df], axis=0)

# print(to_upload)

# table = pa.Table.from_pandas(to_upload)

# # Create a BytesIO buffer to hold the Parquet data
# buffer = pa.BufferOutputStream()

# # Write the Table to the buffer in Parquet format
# pq.write_table(table, buffer)

# # Get the buffer's content
# parquet_data = buffer.getvalue().to_pybytes()

# upload_to_blob(blob_service_client=blob_service_client,
#     container_name="first-stock-blob",
#     filename="stock_measures.parquet",
#     data=parquet_data
#     )



    ticker  current_price  trailing_pe  trailing_earnings_per_share  \
0      MMM         100.01    10.331612                         9.68   
1      AOS          68.92    43.345910                         1.59   
2      ABT         103.71    31.522797                         3.29   
3     ABBV         150.14    35.327057                         4.25   
4      ACN         302.94    27.024086                        11.21   
..     ...            ...          ...                          ...   
375    XEL          58.19    18.015480                         3.23   
376    XYL          99.46    48.517075                         2.05   
377   ZBRA         271.98    34.691326                         7.84   
378    ZBH         119.18    55.691586                         2.14   
379    ZTS         181.57    41.172340                         4.41   

     free_cash_flow  cash_per_share  price_to_cash_flow  return_on_equity  \
0       -2560000000           7.815        1.680723e-08          -0.13

In [5]:

upload_blob_file(
    blob_service_client=blob_service_client,
    container_name="first-stock-blob",
    filepath="./",
    filename="stock_measures.parquet",
)

In [27]:


downloaded_file = download_blob_to_file(
    blob_service_client=blob_service_client,
    container_name="first-stock-blob",
    filepath="./",
    filename="stock_measures.parquet",
    blob_name="stock_measures.parquet",
)

In [28]:
print(downloaded_file)

    ticker  current_price  trailing_pe  trailing_earnings_per_share  \
0      MMM        99.8100    10.310950                         9.68   
1      AOS        68.9800    43.383648                         1.59   
2      ABT       103.4700    31.449848                         3.29   
3     ABBV       150.2500    35.352940                         4.25   
4      ACN       302.1800    26.956287                        11.21   
..     ...            ...          ...                          ...   
374    XEL        58.1401    18.000030                         3.23   
375    XYL        99.8100    48.687805                         2.05   
376   ZBRA       273.3100    34.860970                         7.84   
377    ZBH       118.8300    55.528034                         2.14   
378    ZTS       180.1700    40.854877                         4.41   

     free_cash_flow  cash_per_share  price_to_cash_flow  return_on_equity  
0       -2560000000           7.815        1.680616e-08          -0.133

In [25]:
downloaded_file_part_1 = downloaded_file.iloc[: len(downloaded_file) // 2]
downloaded_file_part_2 = downloaded_file.iloc[len(downloaded_file) // 2 :]

to_upload = pd.concat([downloaded_file_part_1, downloaded_file_part_2], axis=0)
print(to_upload)

    ticker  current_price  trailing_pe  trailing_earnings_per_share  \
0      MMM        99.8100    10.310950                         9.68   
1      AOS        68.9800    43.383648                         1.59   
2      ABT       103.4700    31.449848                         3.29   
3     ABBV       150.2500    35.352940                         4.25   
4      ACN       302.1800    26.956287                        11.21   
..     ...            ...          ...                          ...   
374    XEL        58.1401    18.000030                         3.23   
375    XYL        99.8100    48.687805                         2.05   
376   ZBRA       273.3100    34.860970                         7.84   
377    ZBH       118.8300    55.528034                         2.14   
378    ZTS       180.1700    40.854877                         4.41   

     free_cash_flow  cash_per_share  price_to_cash_flow  return_on_equity  
0       -2560000000           7.815        1.680616e-08          -0.133