In [1]:
#pip install mysql-connector-python

In [2]:
import pandas as pd
import mysql
import mysql.connector
import numpy as np

In [3]:
dir(mysql)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'connector']

CREAR BASE DE DATOS

In [5]:
#actualizar user/password antes de ejecutar
database = "yfinance_stocks"

def create_database(database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password)

    cursor = db.cursor()
    
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database};")

create_database("yfinance_stocks")

In [6]:
#actualizar user/password antes de ejecutar
database = "yfinance_stocks"

db = mysql.connector.connect(host     = "localhost",
                             user     = "root",
                             password = "Hackaboss_2024",
                             database = database)

cursor = db.cursor()

#tabla stocks
cursor.execute('''
    CREATE TABLE IF NOT EXISTS stocks (
        ticker VARCHAR(10) PRIMARY KEY,
        name VARCHAR(255),
        sector VARCHAR(255),
        industry VARCHAR(255),
        market_cap BIGINT,
        full_time_employees INT,
        ipo_date DATE,
        extraction_timestamp TIMESTAMP
    )
''')

#tabla historical_prices
cursor.execute('''
    CREATE TABLE IF NOT EXISTS historical_prices (
        date DATE,
        open DECIMAL(15, 4),
        high DECIMAL(15, 4),
        low DECIMAL(15, 4),
        close DECIMAL(15, 4),
        adj_close DECIMAL(15, 4),
        volume BIGINT,
        ticker VARCHAR(10),
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

#tabla dividends
cursor.execute('''
    CREATE TABLE IF NOT EXISTS dividends (
        date DATE,
        dividends DECIMAL(10, 4),
        ticker VARCHAR(10),
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
        )
''')

#tabla splits
cursor.execute('''
    CREATE TABLE IF NOT EXISTS splits (
        date DATE,
        stock_splits DECIMAL(10, 4),
        ticker VARCHAR(10),
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

#tabla balance_sheet
#Los Liabilities (pasivos totales) son las deudas y obligaciones combinadas que una persona o empresa tiene con terceros. 
#Todo lo que posee la empresa se clasifica como activo y todos los montos que la empresa debe por obligaciones futuras se registran como pasivos. 
#En el balance general, los activos totales menos los pasivos totales equivalen al capital.

cursor.execute('''
    CREATE TABLE IF NOT EXISTS balance_sheet (
        date DATE,
        ticker VARCHAR(10),
        ordinary_shares_number BIGINT,
        net_debt BIGINT,
        total_assets BIGINT,
        current_liabilities BIGINT,
        stockholders_equity BIGINT,
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
        )
''')

#tabla income_statement
#El EBITDA es un indicador financiero que muestra las ganancias de una empresa, 
#antes de restar pagos y costes como impuestos, intereses, depreciaciones y amortizaciones.
#Un EPS diluido alto significa que la empresa genera muchas ganancias por acción, se utilizan 
#para comparar el EPS de una empresa con sus competidores.
#El Operating_Income es sinónimo de beneficio operativo, permite analizar 
#el rendimiento operativo de una empresa eliminando intereses e impuestos.

cursor.execute('''
    CREATE TABLE IF NOT EXISTS income_statement (
        date DATE,
        ticker VARCHAR(10),
        ebitda BIGINT,
        total_revenue BIGINT,
        net_income BIGINT,
        operating_income BIGINT,
        diluted_eps DECIMAL(10, 4),
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

#tabla cashflow
#Capital Expenditures (gastos de capital) y se refiere a las inversiones que realiza una empresa para 
#adquirir, mejorar o mantener activos a largo plazo como edificios, terrenos, maquinaria o equipos.
#El flujo de efectivo operativo mide el efectivo generado por las operaciones comerciales de una empresa. 
#El flujo de efectivo libre es el efectivo que una empresa genera a partir de sus operaciones comerciales 
#después de restar los gastos de capital.

cursor.execute('''
    CREATE TABLE IF NOT EXISTS cashflow (
        date DATE,
        ticker VARCHAR(10),
        operating_cash_flow BIGINT,
        capital_expenditure BIGINT,
        free_cash_flow BIGINT,
        extraction_timestamp TIMESTAMP,
        PRIMARY KEY (date, ticker),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

#tabla upgrades_downgrades
cursor.execute('''
    CREATE TABLE IF NOT EXISTS upgrades_downgrades (
        id INT AUTO_INCREMENT PRIMARY KEY,
        date DATETIME,
        firm VARCHAR(255),
        to_grade VARCHAR(50),
        from_grade VARCHAR(50),
        action VARCHAR(50),
        ticker VARCHAR(10),
        extraction_timestamp TIMESTAMP,
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

# Tabla holder_locations
cursor.execute('''
    CREATE TABLE IF NOT EXISTS holder_location (
        holder VARCHAR(255) PRIMARY KEY,
        latitude DECIMAL(9, 6),
        longitude DECIMAL(9, 6),
        headquarters VARCHAR(255)
    )
''')

# Tabla holders
cursor.execute('''
    CREATE TABLE IF NOT EXISTS holders (
        id INT AUTO_INCREMENT PRIMARY KEY,
        date DATE,
        holder VARCHAR(255),
        pct_held DECIMAL(5, 2),
        shares BIGINT,
        value BIGINT,
        ticker VARCHAR(10),
        extraction_timestamp TIMESTAMP,
        FOREIGN KEY (holder) REFERENCES holder_location (holder),
        FOREIGN KEY (ticker) REFERENCES stocks (ticker)
    )
''')

db.commit()
cursor.close()
db.close()

RELLENAR BASE DE DATOS

In [8]:
#Función para rellenar tabla stocks

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("stocks_20241028.csv", index_col=0).replace({np.nan : None}).values
database = "yfinance_stocks"
table = "stocks"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('ticker', 'name', 'sector', 'industry', 'market_cap', 'full_time_employees', 'ipo_date', 'extraction_timestamp')
INSERT INTO stocks (ticker, name, sector, industry, market_cap, full_time_employees, ipo_date, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
Añadidas: 57 filas


In [9]:
#Función para rellenar tabla historical_prices

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("historical_prices_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "historical_prices"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'open', 'high', 'low', 'close', 'adj_close', 'volume', 'ticker', 'extraction_timestamp')
INSERT INTO historical_prices (date, open, high, low, close, adj_close, volume, ticker, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
Añadidas: 40844 filas


In [10]:
#Función para rellenar tabla dividends

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("dividends_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "dividends"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'dividends', 'ticker', 'extraction_timestamp')
INSERT INTO dividends (date, dividends, ticker, extraction_timestamp) VALUES (%s, %s, %s, %s)
Añadidas: 333 filas


In [11]:
#Función para rellenar tabla splits

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("splits_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "splits"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'stock_splits', 'ticker', 'extraction_timestamp')
INSERT INTO splits (date, stock_splits, ticker, extraction_timestamp) VALUES (%s, %s, %s, %s)
Añadidas: 31 filas


In [12]:
#Función para rellenar tabla balance_sheet

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("balance_sheet_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "balance_sheet"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'ticker', 'ordinary_shares_number', 'net_debt', 'total_assets', 'current_liabilities', 'stockholders_equity', 'extraction_timestamp')
INSERT INTO balance_sheet (date, ticker, ordinary_shares_number, net_debt, total_assets, current_liabilities, stockholders_equity, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
Añadidas: 23 filas


In [13]:
#Función para rellenar tabla income_statement

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("income_statement_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "income_statement"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'ticker', 'ebitda', 'total_revenue', 'net_income', 'operating_income', 'diluted_eps', 'extraction_timestamp')
INSERT INTO income_statement (date, ticker, ebitda, total_revenue, net_income, operating_income, diluted_eps, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
Añadidas: 22 filas


In [14]:
#Función para rellenar tabla cashflow

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("cashflow_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "cashflow"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'ticker', 'operating_cash_flow', 'capital_expenditure', 'free_cash_flow', 'extraction_timestamp')
INSERT INTO cashflow (date, ticker, operating_cash_flow, capital_expenditure, free_cash_flow, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s)
Añadidas: 24 filas


In [15]:
#Función para rellenar tabla upgrades_downgrades

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("upgrades_downgrades_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "upgrades_downgrades"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names[1:]
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'firm', 'to_grade', 'from_grade', 'action', 'ticker', 'extraction_timestamp')
INSERT INTO upgrades_downgrades (date, firm, to_grade, from_grade, action, ticker, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s)
Añadidas: 3394 filas


In [17]:
#Función para rellenar holders_location

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("holder_locations.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "holder_location"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('holder', 'latitude', 'longitude', 'headquarters')
INSERT INTO holder_location (holder, latitude, longitude, headquarters) VALUES (%s, %s, %s, %s)
Añadidas: 19 filas


In [19]:
#Función para rellenar tabla holders

#Leer el archivo CSV, ignorando la primera columna de índice
data = pd.read_csv("holders_20241028.csv").replace({np.nan : None}).values
database = "yfinance_stocks"
table = "holders"

def insert_to_table(data, table, database, host = "localhost", user = "root", password = "Hackaboss_2024"):
    
    db = mysql.connector.connect(host     = host,
                                 user     = user,
                                 password = password,
                                 database = database)
    cursor = db.cursor()

    # Seleccionamos las columnas de la tabla, omitiendo la Primary Key
    cursor.execute(f"SELECT * FROM {table} LIMIT 0;")
    column_names = cursor.column_names[1:]
    cursor.fetchall()
    print(column_names)

    insert_query = f"INSERT INTO {table} ({', '.join(column_names)}) VALUES ({', '.join(['%s' for _ in column_names])})".replace("'", "")
    values = [tuple(row) for row in data]
    print(insert_query)

    # .executemany ejecuta el query de INSERT INTO con cada uno de los elementos de "values"
    cursor.executemany(insert_query, values)
    
    # Guarda los resultados
    db.commit()

    print(f"Añadidas: {cursor.rowcount} filas")

    cursor.fetchall() # Vaciamos el cursor
    #cursor.close()
    #db.close()

#ejecutar función
insert_to_table(data = data, table = table, database = database)

('date', 'holder', 'pct_held', 'shares', 'value', 'ticker', 'extraction_timestamp')
INSERT INTO holders (date, holder, pct_held, shares, value, ticker, extraction_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s)
Añadidas: 50 filas
