In [13]:
import pandas as pd
from sqlalchemy import create_engine

# Establishing connection to MySQL database
database_url = 'mysql+pymysql://root:abcd123@127.0.0.1/DataSpark'
engine = create_engine(database_url)

In [16]:
query = '''
    select * from dataspark_final_df
'''

df = pd.read_sql_query(query, engine)
print(df)

       order_number  line_item order_date  customerkey  storekey  productkey  \
0            366000          1 2016-01-01       265598        10        1304   
1           1749017          1 2019-10-15       265598         0        1619   
2            891000          2 2017-06-09       265598         9         174   
3            891000          1 2017-06-09       265598         9         385   
4            891000          4 2017-06-09       265598         9          87   
...             ...        ...        ...          ...       ...         ...   
62879       1511040          1 2019-02-19       957765        40         757   
62880       1466016          1 2019-01-05       347907         9        2232   
62881       1625031          1 2019-06-13       725929        30        2308   
62882       1632012          1 2019-06-20      1292476        65        2331   
62883       1825024          1 2019-12-30      1546902        50        2479   

       quantity currency_code  \
0     

In [18]:
# SQL query to get column names
query = '''
    SELECT COLUMN_NAME
    FROM INFORMATION_SCHEMA.COLUMNS
    WHERE TABLE_NAME = 'dataspark_final_df'
      AND TABLE_SCHEMA = 'DataSpark'
'''

# Execute the query and read the result into a DataFrame
columns_df = pd.read_sql_query(query, engine)

# Print the column names
print(columns_df)

            COLUMN_NAME
0                   age
1             age_group
2              birthday
3                 brand
4              category
5           categorykey
6                  city
7                 color
8             continent
9               country
10        currency_code
11          customerkey
12       exchange_value
13               gender
14            line_item
15           order_date
16         order_number
17         product_name
18           productkey
19      profit_loss_org
20      profit_loss_usd
21             quantity
22                state
23        store_country
24      store_open_date
25  store_square_meters
26          store_state
27             storekey
28          subcategory
29       subcategorykey
30       total_cost_org
31       total_cost_usd
32      total_price_org
33      total_price_usd
34            unit_cost
35           unit_price
36             zip_code


In [23]:
# Queries to be executed
queries = {
    "Total Sales by Year": """
        SELECT YEAR(order_date) AS year, 
               ROUND(SUM(total_price_usd), 2) AS total_sales_usd
        FROM dataspark_final_df
        GROUP BY year
        ORDER BY year;
    """,
    "Monthly Sales Trend for 2019": """
        SELECT YEAR(order_date) AS year, 
               MONTH(order_date) AS month, 
               ROUND(SUM(total_price_usd), 2) AS monthly_sales_usd
        FROM dataspark_final_df
        WHERE YEAR(order_date) = 2019
        GROUP BY year, month
        ORDER BY month;
    """,
    "Top 5 Countries by Total Sales": """
        SELECT store_country, 
        ROUND(SUM(total_price_usd), 2) AS total_sales_usd
        FROM dataspark_final_df
        GROUP BY store_country
        ORDER BY total_sales_usd DESC
        LIMIT 5;
    """,
    "Profit Margin by Year": """
        SELECT YEAR(order_date) AS year,
        ROUND((SUM(total_price_usd) - SUM(total_cost_usd)) / SUM(total_price_usd) * 100, 2) AS profit_margin_percentage
        FROM dataspark_final_df
        GROUP BY year
        ORDER BY year;
    """,
    "Top 5 Products by Total Sales": """
        SELECT product_name, 
               SUM(total_price_usd) AS total_sales_usd
        FROM dataspark_final_df
        GROUP BY product_name
        ORDER BY total_sales_usd DESC
        LIMIT 5;
    """,
    "Sales Distribution by Age Group": """
        SELECT age_group, 
               ROUND(SUM(total_price_usd), 2) AS total_sales_usd, 
               COUNT(order_number) AS total_orders
        FROM dataspark_final_df
        GROUP BY age_group
        ORDER BY total_sales_usd DESC;
    """,
    "Total Sales and Profit by Country": """
        SELECT store_country, 
               ROUND(SUM(total_price_usd), 2) AS total_sales_usd, 
               ROUND(SUM(profit_loss_usd), 2) AS total_profit_usd
        FROM dataspark_final_df
        GROUP BY store_country
        ORDER BY total_sales_usd DESC;
    """,
    "Top 5 Brands by Sales Amount": """
        SELECT brand, 
               ROUND(SUM(total_price_usd), 2) AS total_sales_usd
        FROM dataspark_final_df
        GROUP BY brand
        ORDER BY total_sales_usd DESC
        LIMIT 5;
    """,
    "Impact of Store Size on Profitability": """
        SELECT subcategory,
        ROUND(SUM(total_price_usd), 2) AS total_sales_usd
        FROM dataspark_final_df
        GROUP BY subcategory
        ORDER BY total_sales_usd DESC
        LIMIT 5;
    """,
    "Top Categories by Sales and Profit Margin": """
        SELECT category, 
        ROUND(SUM(total_price_usd), 2) AS total_sales_usd, 
        ROUND((SUM(profit_loss_usd) / SUM(total_price_usd)) * 100, 2) AS profit_margin_percentage
        FROM dataspark_final_df
        GROUP BY category
        ORDER BY total_sales_usd DESC
        LIMIT 5;
    """
}

# Dictionary to store DataFrames
results = {}

# Executing each query and displaying the result in a DataFrame
for query_name, query in queries.items():
    df = pd.read_sql_query(query, con=engine)
    results[query_name] = df
    print(f"\n{query_name}:\n", df)



Total Sales by Year:
    year  total_sales_usd
0  2016       6946793.56
1  2017       7421422.27
2  2018      12788960.66
3  2019      18264382.48
4  2020       9294632.14
5  2021       1039288.48

Monthly Sales Trend for 2019:
     year  month  monthly_sales_usd
0   2019      1         1940899.57
1   2019      2         2102862.57
2   2019      3          845925.09
3   2019      4          149892.71
4   2019      5         1594446.47
5   2019      6         1404861.40
6   2019      7         1408714.62
7   2019      8         1500784.77
8   2019      9         1547870.73
9   2019     10         1575168.82
10  2019     11         1715659.88
11  2019     12         2477295.85

Top 5 Countries by Total Sales:
     store_country  total_sales_usd
0   United States      23764425.86
1          Online      11404324.63
2  United Kingdom       5749769.78
3         Germany       4246279.22
4          Canada       3611561.79

Profit Margin by Year:
    year  profit_margin_percentage
0  2016     

In [17]:
df

Unnamed: 0,order_number,line_item,order_date,customerkey,storekey,productkey,quantity,currency_code,product_name,brand,...,store_state,store_square_meters,store_open_date,total_cost_usd,total_price_usd,profit_loss_usd,total_cost_org,total_price_org,profit_loss_org,age_group
0,366000,1,2016-01-01,265598,10,1304,1,CAD,Contoso Lens Adapter M450 White,Contoso,...,Nunavut,1210.0,2015-04-04,31.27,68.00,36.73,43.415268,94.411200,50.995932,40-60
1,1749017,1,2019-10-15,265598,0,1619,1,CAD,Contoso DVD Player M130 Grey,Contoso,...,Online,0.0,2010-01-01,27.59,59.99,32.40,36.501570,79.366770,42.865200,40-60
2,891000,2,2017-06-09,265598,9,174,1,CAD,SV 22xDVD M600 Black,Southridge Video,...,Northwest Territories,1500.0,2005-03-04,43.04,129.90,86.86,58.142736,175.481910,117.339174,40-60
3,891000,1,2017-06-09,265598,9,385,2,CAD,Adventure Works Laptop8.9 E0890 Red,Adventure Works,...,Northwest Territories,1500.0,2005-03-04,332.40,652.00,319.60,449.039160,880.786800,431.747640,40-60
4,891000,4,2017-06-09,265598,9,87,1,CAD,NT Wireless Bluetooth Stereo Headphones M402 P...,Northwind Traders,...,Northwest Territories,1500.0,2005-03-04,45.98,99.99,54.01,62.114382,135.076491,72.962109,40-60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62879,1511040,1,2019-02-19,957765,40,757,6,GBP,Contoso Enhanced Capacity Battery M800 Black,Contoso,...,Dungannon and South Tyrone,1300.0,2012-06-06,76.98,167.40,90.42,59.428560,129.232800,69.804240,20-40
62880,1466016,1,2019-01-05,347907,9,2232,2,CAD,WWI Chandelier M815 Black,Wide World Importers,...,Northwest Territories,1500.0,2005-03-04,246.94,537.00,290.06,331.936748,721.835400,389.898652,60-80
62881,1625031,1,2019-06-13,725929,30,2308,2,EUR,Proseware Desk Lamp E0130 Blue,Proseware,...,Pesaro,2100.0,2008-01-12,81.52,159.90,78.38,72.210416,141.639420,69.429004,20-40
62882,1632012,1,2019-06-20,1292476,65,2331,2,USD,Litware Wall Lamp E3015 Silver,Litware,...,West Virginia,1785.0,2012-01-01,104.00,203.98,99.98,104.000000,203.980000,99.980000,20-40
