In [71]:
import requests
import os
import pandas as pd
import datetime as dt

from dotenv import load_dotenv; load_dotenv()
from bs4 import BeautifulSoup
from mysql.connector import connect, Error, errorcode

In [2]:
url = 'https://learnsql.com/blog/partition-by-with-over-sql/'
res = requests.get(url)
res.status_code

200

In [21]:
soup = BeautifulSoup(res.text, 'html.parser')
tables = soup.find_all('table')

## PARTITION BY

In [31]:
# attract table car_list_prices
car_list_prices_table = tables[0]

# retrieve headers
headers = [cell.text for cell in car_list_prices_table.find_all('th')]

# retrieve rows
rows = car_list_prices_table.find_all('tr')[1:]
data = [
    (
        row.find_all('td')[0].text, # car_make
        row.find_all('td')[1].text, # car_model
        row.find_all('td')[2].text, # car_type
        int(row.find_all('td')[3].text) # car_price
    )
    for row in rows
]

In [67]:
# create table script
CREATE_TABLE = """CREATE TABLE car_list_prices (
    car_make VARCHAR(10)
    ,car_model VARCHAR(10)
    ,car_type VARCHAR(15)
    ,car_price INT
)
"""

In [37]:
config = {
    'host': os.getenv('HOST')
    ,'user': os.getenv('USER_')
    ,'password': os.getenv('PASSWORD')
    ,'database': 'learnsql'
}

In [96]:
with connect(**config) as conn:
    if not conn.is_connected():
        print('unable to connect')

    with conn.cursor() as cur:
        try:
            print('...creating table')
            cur.execute(CREATE_TABLE)
        except Exception as e:
            print('\nError:', e)

        cur.executemany(
            "INSERT INTO car_list_prices VALUES (%s, %s, %s, %s)"
            ,data
        )
        conn.commit()

        print()

        cur.execute('select * from car_list_prices limit 3')
        result = cur.fetchall()
        print('printing result...')
        for x in result: print('\t', x)

    print('\n...connection closed')

...creating table

Error: 1050 (42S01): Table 'car_list_prices' already exists

printing result...
	 ('Ford', 'Mondeo', 'premium', 18200)
	 ('Renault', 'Fuego', 'sport', 16500)
	 ('Citroen', 'Cactus', 'premium', 19000)

...connection closed


In [88]:
%load_ext sql

In [93]:
conn_string = f"mysql+pymysql://{os.getenv('USER_')}:{os.getenv('PASSWORD')}@{os.getenv('HOST')}/learnsql"
%sql $conn_string

In [97]:
%%sql
select * from car_list_prices;

 * mysql+pymysql://tina:***@localhost/learnsql
7 rows affected.


car_make,car_model,car_type,car_price
Ford,Mondeo,premium,18200
Renault,Fuego,sport,16500
Citroen,Cactus,premium,19000
Ford,Falcon,low cost,8990
Ford,Galaxy,standard,12400
Renault,Megane,standard,14300
Citroen,Picasso,premium,23400


obtain the make, the model, the price, the average price

In [104]:
%%sql
select
    car_make
    ,car_model
    ,car_type
    ,car_price
    ,round(avg(car_price) over (partition by car_type), 2) as average_price_per_type
    ,round(avg(car_price) over (partition by car_make), 2) as average_price_per_make
from car_list_prices;

 * mysql+pymysql://tina:***@localhost/learnsql
7 rows affected.


car_make,car_model,car_type,car_price,average_price_per_type,average_price_per_make
Citroen,Cactus,premium,19000,20200.0,21200.0
Citroen,Picasso,premium,23400,20200.0,21200.0
Ford,Falcon,low cost,8990,8990.0,13196.67
Ford,Mondeo,premium,18200,20200.0,13196.67
Ford,Galaxy,standard,12400,13350.0,13196.67
Renault,Fuego,sport,16500,16500.0,15400.0
Renault,Megane,standard,14300,13350.0,15400.0


In [106]:
%sql -x mysql+pymysql://tina:***@localhost/learnsql

## CUMULATIVE - RUNNING TOTAL

In [2]:
res = requests.get('https://learnsql.com/blog/what-is-a-running-total-and-how-to-compute-it-in-sql/')
res.status_code

200

In [3]:
soup = BeautifulSoup(res.text, 'html.parser')
tables = soup.find_all('table')
len(tables)

8

In [58]:
competition = tables[6]
rows = competition.find_all('tr')[1:]

# create df
df = pd.DataFrame(
    {
        'game_id': [int(row.find_all('td')[0].text) for row in rows],
        'game_level': [int(row.find_all('td')[1].text) for row in rows],
        'gamer_id': [int(row.find_all('td')[2].text) for row in rows],
        'competition_date': [dt.datetime.strptime(row.find_all('td')[3].text, ('%Y-%m-%d')) for row in rows],
        'score': [int(row.find_all('td')[4].text) for row in rows]
    }
)

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   game_id           11 non-null     int64         
 1   game_level        11 non-null     int64         
 2   gamer_id          11 non-null     int64         
 3   competition_date  11 non-null     datetime64[ns]
 4   score             11 non-null     int64         
dtypes: datetime64[ns](1), int64(4)
memory usage: 572.0 bytes


In [61]:
[*df.itertuples(index=False)]

[Pandas(game_id=1, game_level=3, gamer_id=4, competition_date=Timestamp('2020-04-02 00:00:00'), score=4),
 Pandas(game_id=1, game_level=2, gamer_id=4, competition_date=Timestamp('2020-04-03 00:00:00'), score=5),
 Pandas(game_id=1, game_level=1, gamer_id=4, competition_date=Timestamp('2020-04-04 00:00:00'), score=2),
 Pandas(game_id=1, game_level=3, gamer_id=5, competition_date=Timestamp('2020-04-02 00:00:00'), score=1),
 Pandas(game_id=1, game_level=2, gamer_id=5, competition_date=Timestamp('2020-04-03 00:00:00'), score=2),
 Pandas(game_id=2, game_level=3, gamer_id=7, competition_date=Timestamp('2020-04-07 00:00:00'), score=4),
 Pandas(game_id=2, game_level=2, gamer_id=7, competition_date=Timestamp('2020-04-08 00:00:00'), score=6),
 Pandas(game_id=2, game_level=1, gamer_id=7, competition_date=Timestamp('2020-04-07 00:00:00'), score=2),
 Pandas(game_id=2, game_level=3, gamer_id=6, competition_date=Timestamp('2020-04-08 00:00:00'), score=1),
 Pandas(game_id=2, game_level=2, gamer_id=6, c

In [83]:
with connect(**config) as conn:
    if not conn.is_connected():
        print('db is not connected... please check again')

    with conn.cursor() as cur:
        try:
            cur.execute(
                "create table competition"
                " (game_id int, game_level int, gamer_id int, competition_date date, score int);"
            )
        except Error as er:
            if er.errno == errorcode.ER_TABLE_EXISTS_ERROR:
                print('table already exists')
            else: print(er, er.msg)


        cur.executemany(
            "insert into competition values (%s, %s, %s, %s, %s)", [*df.itertuples(index=False)]
        )

        cur.execute('select * from competition;')
        result = cur.fetchall()

        for each in result:
            print(each)

        conn.commit()

table already exists
(1, 3, 4, datetime.date(2020, 4, 2), 4)
(1, 2, 4, datetime.date(2020, 4, 3), 5)
(1, 1, 4, datetime.date(2020, 4, 4), 2)
(1, 3, 5, datetime.date(2020, 4, 2), 1)
(1, 2, 5, datetime.date(2020, 4, 3), 2)
(2, 3, 7, datetime.date(2020, 4, 7), 4)
(2, 2, 7, datetime.date(2020, 4, 8), 6)
(2, 1, 7, datetime.date(2020, 4, 7), 2)
(2, 3, 6, datetime.date(2020, 4, 8), 1)
(2, 2, 6, datetime.date(2020, 4, 9), 1)
(2, 3, 8, datetime.date(2020, 4, 7), 2)
