# Ejercicio

El ejercicio consiste en extraer datos de manera automatizada, realizarle las transformaciones necesarias y luego cargarlos en la base de datos ya creada. La idea sería que con una simple ejecución diaria de este código nuestra base de datos de MySQL se vea actualizada.

 - Carga la librería yfinance, ya que es la que contiene información diaria de los precios de la bolsa. 
 
 - Selecciona un ticker (código de empresa), por ejemplo 'MMM' y ejecuta este código para ver qué nos devuelve yfinance, analiza qué variables tenemos, cómo se llaman, qué formato tienen, qué parte de este dataframe nos interesaría, qué transformaciones necesitaría para encajar perfectamente en la tabla Stocks, que es la que vamos a actualizar diariamente
   ```
    dat = yf.Ticker('MMM')
    dat = dat.history(period='1d')
   ```
 - Crea una función extract(), que automatice esta llamada, una transform(), que ponga los formatos de las variables "en su sitio" y que ordene las variables tal y como las llamarás en la función definitiva load().



In [2]:
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Date, Float, Enum, ForeignKey

from sqlalchemy_utils import database_exists, create_database, drop_database

from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker

import pandas as pd
import datetime as datetime

import pymysql
import tqdm

#!pip install yfinance --upgrade --no-cache-dir
import yfinance as yf

C:\Users\bvazq\anaconda3\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll
C:\Users\bvazq\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


## Activamos la conexión con MySQL

Y definimos de nuevo las clases de las tablas, ya que las necesitaremos igualmente

In [3]:
engine = create_engine('mysql+pymysql://root:passwordMSQL@localhost:3306/Stockify')
Base = declarative_base()

In [4]:
class Company(Base):
    __tablename__ = 'Company'
    company_code = Column(String(120),primary_key=True)
    security = Column(String(120))
    sec = Column(String(120))
    gics_sector = Column(String(120))
    gics_sub_industry = Column(String(120))
    heads_location = Column(String(120))
    start_date = Column(Date)
    cik = Column(String(120))
    founded = Column(String(120))

In [5]:
class Stocks(Base):
    __tablename__ = 'Stocks'
    stock_id = Column(Integer,primary_key=True)
    company_code = Column(String(120),ForeignKey("Company.company_code"))
    date = Column(Date)
    max_price = Column(Float)
    min_price = Column(Float)
    volume = Column(Float)
    close = Column(Float)
    open = Column(Float)

In [5]:
class user(Base):
    __tablename__ = 'User'
    user_id = Column(Integer,primary_key=True)
    user_name=Column(String(20))
    user_city=Column(String(120))

In [6]:
class transactions(Base):
    __tablename__ = 'Transactions'
    trx_id = Column(Integer,primary_key=True)
    user_id = Column(Integer,ForeignKey("User.user_id"))
    company_code = Column(String(120),ForeignKey("Company.company_code"))
    stock_id = Column(Integer,ForeignKey("Stocks.stock_id"))
    units = Column(Integer)

## Definición de las funciones

primeramente tenemos la lista tickers con los códigos de las empresas que vamos a querer actualizar a diario. A continuación define las 3 funciones.

In [7]:
Session = sessionmaker(bind=engine)
session = Session()

In [8]:
# Listado de los tickers que hay en la carga de datos que hicimos a través de los csv. Vamos a iterar sobre ellos para ver sus
# precios a día de hoy. Algunos ya no existen. 
tickers = [
    "AAL", "AAP", "AAPL", "ABBV", "ABC", "ABMD", "ABT", "ACN", "ADBE", "ADI", "ADM", 
    "ADP", "ADSK", "AEE", "AEP", "AES", "AFL", "AIG", "AIZ", "AJG", "AKAM", "ALB", 
    "ALGN", "ALK", "ALL", "ALLE", "AMAT", "AMCR", "AMD", "AME", "AMGN", "AMP", "AMT", 
    "AMZN", "ANET", "ANSS", "ANTM", "AON", "AOS", "APA", "APD", "APH", "APTV", "ARE", 
    "ATO", "ATVI", "AVB", "AVGO", "AVY", "AWK", "AXP", "AZO", "BA", "BAC", "BAX", 
    "BBWI", "BBY", "BDX", "BEN", "BF.B", "BIIB", "BIO", "BK", "BKNG", "BKR", "BLK", 
    "BLL", "BMY", "BR", "BRK.B", "BRO", "BSX", "BWA", "BXP", "C", "CAG", "CAH", 
    "CARR", "CAT", "CB", "CBOE", "CBRE", "CCI", "CCL", "CDAY", "CDNS", "CDW", "CE", 
    "CEG", "CERN", "CF", "CFG", "CHD", "CHRW", "CHTR", "CI", "CINF", "CL", "CLX", 
    "CMA", "CMCSA", "CME", "CMG", "CMI", "CMS", "CNC", "CNP", "COF", "COO", "COP", 
    "COST", "CPB", "CPRT", "CRL", "CRM", "CSCO", "CSX", "CTAS", "CTLT", "CTRA", 
    "CTSH", "CTVA", "CTXS", "CVS", "CVX", "CZR", "D", "DAL", "DD", "DE", "DFS", 
    "DG", "DGX", "DHI", "DHR", "DIS", "DISCA", "DISCK", "DISH", "DLR", "DLTR", "DOV", 
    "DOW", "DPZ", "DRE", "DRI", "DTE", "DUK", "DVA", "DVN", "DXC", "DXCM", "EA", 
    "EBAY", "ECL", "ED", "EFX", "EIX", "EL", "EMN", "EMR", "ENPH", "EOG", "EPAM", 
    "EQIX", "EQR", "ES", "ESS", "ETN", "ETR", "ETSY", "EVRG", "EW", "EXC", "EXPD", 
    "EXPE", "EXR", "F", "FANG", "FAST", "FB", "FBHS", "FCX", "FDS", "FDX", "FE", 
    "FFIV", "FIS", "FISV", "FITB", "FLT", "FMC", "FOX", "FOXA", "FRC", "FRT", "FTNT", 
    "FTV", "GD", "GE", "GILD", "GIS", "GL", "GLW", "GM", "GNRC", "GOOG", "GOOGL", 
    "GPC", "GPN", "GRMN", "GS", "GWW", "HAL", "HAS", "HCA", "HD", "HES", "HIG", 
    "HII", "HLT", "HOLX", "HON", "HPE", "HPQ", "HRL", "HSIC", "HST", "HSY", "HUM", 
    "HWM", "IBM", "ICE", "IDXX", "IEX", "IFF", "ILMN", "INCY", "INTC", "INTU", "IP", 
    "IPG", "IPGP", "IQV", "IR", "IRM", "ISRG", "IT", "ITW", "IVZ", "J", "JBHT", 
    "JCI", "JKHY", "JNJ", "JNPR", "JPM", "K", "KEY", "KEYS", "KIM", "KLAC", "KMB", 
    "KMI", "KMX", "KO", "KR", "L", "LDOS", "LEN", "LH", "LHX", "LIN", "LKQ", "LLY", 
    "LMT", "LNC", "LNT", "LOW", "LRCX", "LUMN", "LUV", "LVS", "LW", "LYB", "LYV", 
    "MA", "MAA", "MAR", "MAS", "MCD", "MCHP", "MCK", "MCO", "MDLZ", "MDT", "MET", 
    "MGM", "MHK", "MKC", "MKTX", "MLM", "MMC", "MMM", "MNST", "MO", "MOH", "MOS", 
    "MPC", "MPWR", "MRK", "MRNA", "MRO", "MS", "MSCI", "MSFT", "MSI", "MTB", "MTCH", 
    "MTD", "MU", "NCLH", "NDAQ", "NDSN", "NEE", "NEM", "NFLX", "NI", "NKE", "NLOK", 
    "NLSN", "NOC", "NOW", "NRG", "NSC", "NTAP", "NTRS", "NUE", "NVDA", "NVR", "NWL", 
    "NWS", "NWSA", "NXPI", "O", "ODFL", "OGN", "OKE", "OMC", "ORCL", "ORLY", "OTIS", 
    "OXY", "PARA", "PAYC", "PAYX", "PBCT", "PCAR", "PEAK", "PEG", "PENN", "PEP", 
    "PFE", "PFG", "PG", "PGR", "PH", "PHM", "PKG", "PKI", "PLD", "PM", "PNC", "PNR", 
    "PNW", "POOL", "PPG", "PPL", "PRU", "PSA", "PSX", "PTC", "PVH", "PWR", "PXD", 
    "PYPL", "QCOM", "QRVO", "RCL", "RE", "REG", "REGN", "RF", "RHI", "RJF", "RL", 
    "RMD", "ROK", "ROL", "ROP", "ROST", "RSG", "RTX", "SBAC", "SBNY", "SBUX", "SCHW", 
    "SEDG", "SEE", "SHW", "SIVB", "SJM", "SLB", "SNA", "SNPS", "SO", "SPG", "SPGI", 
    "SRE", "STE", "STT", "STX", "STZ", "SWK", "SWKS", "SYF", "SYK", "SYY", "T", 
    "TAP", "TDG", "TDY", "TECH", "TEL", "TER", "TFC", "TFX", "TGT", "TJX", "TMO", 
    "TMUS", "TPR", "TRMB", "TROW", "TRV", "TSCO", "TSLA", "TSN", "TT", "TTWO", "TWTR", 
    "TXN", "TXT", "TYL", "UA", "UAA", "UAL", "UDR", "UHS", "ULTA", "UNH", "UNP", 
    "UPS", "URI", "USB", "V", "VFC", "VLO", "VMC", "VNO", "VRSK", "VRSN", "VRTX", 
    "VTR", "VTRS", "VZ", "WAB", "WAT", "WBA", "WDC", "WEC", "WELL", "WFC", "WM", 
    "WMT", "XOM", "XRAY", "XYL", "YUM", "ZBH", "ZBRA", "ZION", "ZTS"
]


In [None]:
def extract(ticker_list, period='1d'):
    df = pd.DataFrame([]) # Creamos dataframe vacío
    for t in ticker_list:
        # llamada a los datos para cada empresa
        
        # union con el histórico 
        df = pd.concat([df, ...
        
    return df

In [27]:
def transform(df):
    df_transform = df.copy()
    
    # Realiza las transformaciones necesarias
    
    return df_transform

In [28]:
def load(ticker_list, period,table):

    # Llama a las funciones extract y transform
    
    # En principio el resto de la función load lo podemos dejar igual que cuando alimentamos las tablas por primera vez
    # los cambios deberán venir hechos en transform()
    
    for i, val in enumerate(df_transform.values):
        if table == 'Company':
            rec = Company (
                company_code = val[2],
                security = val[3],
                sec = val[4],
                gics_sector = val[5],
                gics_sub_industry = val[6],
                heads_location = val[7],
                start_date = datetime.datetime.strptime(str(val[8]), "%d/%m/%Y").date(),
                cik = val[9],
                founded = val[10]
            )
        
        elif table == 'Stocks':
            rec = Stocks (
                company_code = val[-1],
                date = val[1], 
                max_price = val[3],
                min_price = val[4],
                volume = val[6],
                close = val[5],
                open = val[2]

            )
        elif table == 'User':
            rec = user (
                user_id = val[0],
                user_name = val[1],
                user_city = val[2]

            )  
        else:
            rec = transactions(
                trx_id = val[0],
                user_id = val[1],
                company_code = val[3],
                stock_id = val[2],
                units = val[4]
                
            )
            
        session.add(rec)
        
    session.commit()

## Insertamos los datos

Llama a `load()` para que se complete el proceso ETL.

Ten cuidado porque si la acción no queda completamente realizada, porque dé error por ejemplo, habrá que hacer `rollback()`, para revertir lo que se quedó sin completar

In [None]:
period = '1d'
table = 'Stocks'
load(tickers, period, table)
 