## Question 2: CRUD Operations in SQL Server

In [1]:
# Import Python packages
import math
import os
import random
import re
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics
from scipy import stats

Create an SQL table schema to store time-series metal prices. Include fields like Date, Metal, Price.

In [5]:
data_file = "../data/MarketData.csv"
try:
    # Read in data set from the csv file, skip initial metadata rows (first 3 rows)
    f = open(data_file, 'rb')
    df = pd.read_csv(f, skiprows = 3, header = [0, 1, 2, 3])

    # Drop the first column name from the multi-index (for "Dates" column)
    df.columns = ['Dates'] + [f"{col[0]} ({col[1]})" for col in df.columns[1:]]
    
except:
    print('Error opening file/loading data')

In [6]:
# Convert "Dates" column to datetime format, European date format
df["Dates"] = pd.to_datetime(df["Dates"], dayfirst=True)

# Display the first few rows
df

Unnamed: 0,Dates,LME COPPER 3MO ($) (LMCADS03 Comdty),LME ALUMINUM 3MO ($) (LMAHDS03 Comdty),LME ZINC 3MO ($) (LMZSDS03 Comdty),LME LEAD 3MO ($) (LMPBDS03 Comdty),LME TIN 3MO ($) (LMSNDS03 Comdty),Generic 1st 'CL' Future (CL1 Comdty)
0,2010-01-01,7375.0,2230.0,2560.0,2432.0,16950,79.36
1,2010-01-04,7500.0,2267.0,2574.0,2515.0,17450,81.51
2,2010-01-05,7485.0,2302.0,2575.0,2522.5,17375,81.77
3,2010-01-06,7660.0,2377.0,2718.0,2680.0,17825,83.18
4,2010-01-07,7535.0,2310.0,2607.0,2599.0,17475,82.66
...,...,...,...,...,...,...,...
3386,2022-12-26,8349.5,2389.5,2965.0,2273.5,23934,79.56
3387,2022-12-27,8349.5,2389.5,2965.0,2273.5,23934,79.53
3388,2022-12-28,8443.0,2381.0,3005.5,2218.0,24734,78.96
3389,2022-12-29,8418.0,2405.0,2984.5,2272.5,24915,78.40


The data has been read in "wide format". According to the instructions, we'd like to save it in "long format" in the SQL database, therefore me need to transform the pandas dataframe to long format


In [8]:
# Use melt() to transform from wide to long format
df_long = df.melt(id_vars=['Dates'], var_name='Metal', value_name='Price')
df_long

Unnamed: 0,Dates,Metal,Price
0,2010-01-01,LME COPPER 3MO ($) (LMCADS03 Comdty),7375.00
1,2010-01-04,LME COPPER 3MO ($) (LMCADS03 Comdty),7500.00
2,2010-01-05,LME COPPER 3MO ($) (LMCADS03 Comdty),7485.00
3,2010-01-06,LME COPPER 3MO ($) (LMCADS03 Comdty),7660.00
4,2010-01-07,LME COPPER 3MO ($) (LMCADS03 Comdty),7535.00
...,...,...,...
20341,2022-12-26,Generic 1st 'CL' Future (CL1 Comdty),79.56
20342,2022-12-27,Generic 1st 'CL' Future (CL1 Comdty),79.53
20343,2022-12-28,Generic 1st 'CL' Future (CL1 Comdty),78.96
20344,2022-12-29,Generic 1st 'CL' Future (CL1 Comdty),78.40


In [20]:
# Import sqlalchemy packages
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import text

Define SQL Table Schema

In [7]:
CREATE TABLE MetalPrices (
    id SERIAL PRIMARY KEY,        -- Auto-incrementing unique identifier
    date DATE NOT NULL,           -- Date of price entry
    metal VARCHAR(50) NOT NULL,   -- Name of metal (e.g., 'Copper', 'Zinc')
    price DECIMAL(10,2) NOT NULL, -- Metal price with two decimal precision

    -- Indexes for performance optimization
    INDEX idx_date (date),  
    INDEX idx_metal (metal)
);

SyntaxError: invalid syntax (3154966016.py, line 1)

Connect to SQL Database Using SQLAlchemy

In [26]:
# Define the database URI
engine = create_engine('sqlite:///MarketData.db', echo=True , connect_args={"check_same_thread": False})

# Create a session
Session = sessionmaker(bind=engine)
session = Session()


In [27]:
# Save pandas dataframe to the SQL database (append mode prevents overwriting existing data)
df_long.to_sql("MetalPrices", con=engine, if_exists="replace", index=False)

print("Data inserted successfully into the database!")

2025-03-31 01:24:44,351 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-31 01:24:44,356 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("MetalPrices")
2025-03-31 01:24:44,357 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:24:44,361 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("MetalPrices")
2025-03-31 01:24:44,362 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:24:44,365 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-03-31 01:24:44,366 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:24:44,369 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2025-03-31 01:24:44,370 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:24:44,372 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("MetalPrices")
2025-03-31 01:24:44,373 INFO sqlalchemy.engine.Engine [raw sql] ()


OperationalError: (sqlite3.OperationalError) database is locked
[SQL: 
DROP TABLE "MetalPrices"]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

Perform CRUD Operations in SQLAlchemy. First query data from the database

In [13]:
# Read data from the database
query = "SELECT * FROM MetalPrices"
df_query = pd.read_sql(query, con=engine)

print(df_query)



2025-03-31 00:56:16,045 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-31 00:56:16,048 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("SELECT * FROM MetalPrices")
2025-03-31 00:56:16,049 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 00:56:16,053 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("SELECT * FROM MetalPrices")
2025-03-31 00:56:16,055 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 00:56:16,057 INFO sqlalchemy.engine.Engine SELECT * FROM MetalPrices
2025-03-31 00:56:16,058 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 00:56:16,226 INFO sqlalchemy.engine.Engine ROLLBACK
                            Dates                                    Metal  \
0      2010-01-01 00:00:00.000000  LME COPPER    3MO ($) (LMCADS03 Comdty)   
1      2010-01-04 00:00:00.000000  LME COPPER    3MO ($) (LMCADS03 Comdty)   
2      2010-01-05 00:00:00.000000  LME COPPER    3MO ($) (LMCADS03 Comdty)   
3      2010-01-06 00:00:00.000000  LME COPPER    3MO ($) (LMC

Let's query using the WHERE statement

In [14]:
# Read data from the database
query = "SELECT * FROM MetalPrices WHERE Metal = 'LME COPPER    3MO ($) (LMCADS03 Comdty)' AND Dates BETWEEN '2010-06-14' AND '2010-09-23'"
df_query = pd.read_sql(query, con=engine)

print(df_query)

2025-03-31 01:05:02,628 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-03-31 01:05:02,630 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("SELECT * FROM MetalPrices WHERE Metal = 'LME COPPER    3MO ($) (LMCADS03 Comdty)' AND Dates BETWEEN '2010-06-14' AND '2010-09-23'")
2025-03-31 01:05:02,631 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:05:02,633 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("SELECT * FROM MetalPrices WHERE Metal = 'LME COPPER    3MO ($) (LMCADS03 Comdty)' AND Dates BETWEEN '2010-06-14' AND '2010-09-23'")
2025-03-31 01:05:02,635 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:05:02,637 INFO sqlalchemy.engine.Engine SELECT * FROM MetalPrices WHERE Metal = 'LME COPPER    3MO ($) (LMCADS03 Comdty)' AND Dates BETWEEN '2010-06-14' AND '2010-09-23'
2025-03-31 01:05:02,641 INFO sqlalchemy.engine.Engine [raw sql] ()
2025-03-31 01:05:02,649 INFO sqlalchemy.engine.Engine ROLLBACK
                         Dates                               

Create (Insert New Data)

In [16]:
# Insert a new metal price record
session.execute(text("""
    INSERT INTO MetalPrices (Dates, Metal, Price)
    VALUES ('2023-01-06', 'LME COPPER    3MO ($) (LMCADS03 Comdty)', 8589.50)
"""))

session.commit()
print("New data inserted successfully!")

InvalidRequestError: This session is in 'prepared' state; no further SQL can be emitted within this transaction.

Update Data

In [17]:
# Update price for a specific metal and date
session.execute(text("""
    UPDATE MetalPrices
    SET Price = 8600.00
    WHERE metal = 'LME COPPER    3MO ($) (LMCADS03 Comdty)' AND Dates = '2022-01-02'
"""))

session.commit()
print("Price updated successfully!")

InvalidRequestError: This session is in 'prepared' state; no further SQL can be emitted within this transaction.

Delete Data

In [18]:
# Delete a specific record
session.execute(text("""
    DELETE FROM MetalPrices
    WHERE Metal = 'LME ZINC      3MO ($) (LMZSDS03 Comdty)' AND Dates = '2010-01-28'
"""))

session.commit()
print("Record deleted successfully!")

InvalidRequestError: This session is in 'prepared' state; no further SQL can be emitted within this transaction.

Closing the Database Connection

In [25]:
session.close()
engine.dispose()
print("Database connection closed.")

Database connection closed.
