In [None]:
import pandas as pd
import urllib
from sqlalchemy import create_engine, Table, MetaData, select
from sqlalchemy.orm import sessionmaker

# Custom upload with connection string
from engine_info import server_info
# From normalized_tables.py
import normalized_tables

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Creating a connection to MS SQL SERVER
params = urllib.parse.quote_plus(server_info)
engine = create_engine('mssql+pyodbc:///?odbc_connect=%s' % params)
connection = engine.connect()

In [None]:
# Check what is in the database
engine.table_names()

In [None]:
metadata = MetaData(bind=engine)

## 1. Commodity (JHB)

In [None]:
commodity_df = pd.read_sql_table('Joburg_Fresh_produce_commodity_cleaned', con=engine)

In [None]:
commodity_df.head()

In [None]:
# Check the data type so that it's suited to be inserted in a normalized database
commodity_df.info()

In [None]:
# Change the quantity sold to int
commodity_df[['Total_quatity_sold', 'MTD_Total_quatity_sold']] = commodity_df[['Total_quatity_sold', 'MTD_Total_quatity_sold']].astype('int64')

In [None]:
# Convert date column from string to datetime
commodity_df['date'] = pd.to_datetime(commodity_df['date'])

In [None]:
commodity_df.info()

## 2. Container (JHB)

In [None]:
container_df = pd.read_sql_table('Joburg_Fresh_produce_container_cleaned', con=engine)

In [None]:
container_df.head()

In [None]:
container_df.info()

In [None]:
# Change the quantity sold to int
container_df[['Total_quatity_sold', 'MTD_Total_quatity_sold']] = container_df[['Total_quatity_sold', 'MTD_Total_quatity_sold']].astype('int64')

In [None]:
# Convert date column from string to datetime
container_df['date'] = pd.to_datetime(container_df['date'])

In [None]:
container_df.info()

## 3. Combination (JHB)

In [None]:
combo_df = pd.read_sql_table('Joburg_Fresh_produce_combined_cleaned', con=engine, index_col='rowid')

In [None]:
combo_df.head()

In [None]:
combo_df.info()

In [None]:
combo_df.groupby('container')['commodity'].nunique().sort_values(ascending=False)

In [None]:
combo_df[combo_df['container'] == '3KG POCKET']['commodity'].unique()[:10]

There's a many to many relationship between commodity and containers, whereby one commodity can have multiple containers and one container can be of different products.

In [None]:
combo_df.groupby('product_combination')['container'].nunique().sort_values(ascending=False)

In [None]:
combo_df[combo_df['product_combination'] == '*,*,*,*,*']['container'].unique()[:10]

In [None]:
combo_df[combo_df['product_combination'] == '*,*,*,*,*']['commodity'].unique()[:10]

Similarly, product combination can have multiple containers as well as multiple commodities.

## 4. Add data to normalized tables

In [None]:
Session = sessionmaker(bind=engine)

In [None]:
session = Session()

### 4.1 product

In [None]:
# Different products in the database
products = combo_df['commodity'].unique()

In [None]:
# Add unique values of products to the product sql table
for item in products:
    
    # Add commodity to database
    commodity = normalized_tables.Product(name=item)
    session.add(commodity)
    session.commit()

### 4.2 container

In [None]:
# Different containers in the database
containers = container_df['container'].unique()

In [None]:
# Add unique values of containers to the container sql table
for package in containers:
    
    # Add commodity to database
    container = normalized_tables.Container(name=package)
    session.add(container)
    session.commit()

### 4.3 product_combination

In [None]:
# Different product combinations in the database
combinations = combo_df['product_combination'].unique()

In [None]:
# Add unique values of product combinations to the product combinations sql table
for combo in combinations:
    
    # Add commodity to database
    product_combo = normalized_tables.ProductCombination(name=combo)
    session.add(product_combo)
    session.commit()

### 4.4 inventory

In [None]:
# date, available, product_id, container_id
# The inventory table will consist of the products not sold for that day
container_df.info()

In [None]:
for index, x in container_df.head(2).iterrows():
    print(x['commodity'])

In [None]:
for index, row in container_df.iterrows():
    
    inventory = normalized_tables.Inventory(
        date=row['date'],
        available=row['qty_available'],
        product_inventory=normalized_tables.Product(name=row['commodity']),
        container_inventory=normalized_tables.Container(name=row['container'])
    )
    session.add(inventory)
    session.commit()

In [None]:
session.rollback()

### 4.5 sales

In [None]:
session.close()

In [None]:
connection.close()