<a href="https://colab.research.google.com/github/thirayume/muangtai/blob/main/Muangtai_PGSQL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Muangtai PostgreSQL Database export query to CSV**

In [2]:
# @title Mount Google Drive (if need)

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# @title Define secrets

from google.colab import userdata
host = userdata.get('host')
port = userdata.get('port')
database = userdata.get('database')
user = userdata.get('user')
password = userdata.get('password')

In [None]:
# @title Install dependencies (if need)

!pip install psycopg2 pandas

In [5]:
# @title Import dependencies

import psycopg2

import numpy as np
import pandas as pd
from pandas import plotting

import matplotlib.pyplot as plt
import seaborn as sns


import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected = True)

import fastai

import warnings
import os
from pathlib import Path
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
from torch.autograd import Variable

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from math import sqrt

# plt.style.use('fivethirtyeight')
plt.style.use("seaborn-whitegrid")
plt.rc("figure", autolayout = True)
plt.rc("axes", labelweight = "bold", labelsize = "large", titleweight = "bold", titlesize = 14, titlepad = 10)

In [6]:
# @title Define data path

data_path = Path('/content/drive/MyDrive/Colab Notebooks/Muangtai')

%cd {data_path}
%ls

/content/drive/MyDrive/Colab Notebooks/Muangtai
'Muangtai - PGSQL.ipynb'


In [7]:
def mape(actual, pred):
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

In [15]:
# @title Connect to the PostgreSQL database

conn = psycopg2.connect(
    host=host,
    port=port,
    database=database,
    user=user,
    password=password,
)
conn

<connection object at 0x7a226fdfc040; dsn: 'user=postgres password=xxx dbname=muangtaipanit-hatyai_muangtai-online host=monthong.adiwit.co.th port=5433', closed: 0>

In [None]:
# @title Define a SQL to create a "view_picking_list" (if not found)

sql = """
      DROP VIEW IF EXISTS view_fact_picking_lists;

      CREATE OR REPLACE VIEW view_fact_picking_lists
      AS
      select
                   fact_picking_lists.id as fact_picking_lists__id
                  ,dim_dates."year" || '-' || dim_dates."month" || '-' || dim_dates."date" as fact_picking_lists__date
                  ,to_char((LPAD((dim_times.hour_of_day || ''), 2, '0') || '' || LPAD((dim_times.minute_of_hour || ''), 2, '0') || '00')::time,'HH24:MI:SS') as fact_picking_lists__time
                  ,GREATEST(
                      dim_picking_lists.created_at,
                      dim_picking_lists.updated_at,
                      dim_picking_lists.moderated_at, (
                      dim_picking_lists.document_date + '08:30:01'::time)
                  ) as lasted_updated_at
                  ,dim_picking_lists.document_number as dim_picking_lists__document_number
                  ,dim_picking_lists.id as dim_picking_lists__id
                  ,dim_stock_keeping_units.id as dim_stock_keeping_units__sku_id
                  ,fact_picking_lists.quantity as fact_picking_lists__quantity
                  -- , (fact_picking_lists.quantity  * multiply_hierarchy_unit_quantity(fact_picking_lists.stock_keeping_unit_id)) as liters
                  ,dim_families.is_alcoholic as dim_families__is_alcoholic
                  ,dim_picking_lists.owner_id as dim_picking_lists__owner_id
                  ,dim_picking_lists.owner_type as dim_picking_lists__owner_type
      from		    fact_picking_lists
      inner join	dim_dates on dim_dates.id = fact_picking_lists.date_id
      inner join	dim_times on dim_times.id = fact_picking_lists.time_id
      inner join	dim_families on dim_families.id = fact_picking_lists.family_id
      inner join	dim_brands on dim_brands.id = dim_families.brand_id
      inner join 	dim_picking_lists on dim_picking_lists.id = fact_picking_lists.picking_list_id
      inner join 	dim_stock_keeping_units on dim_stock_keeping_units.id = fact_picking_lists.stock_keeping_unit_id
      where       fact_picking_lists.deleted_at is null
      and         dim_picking_lists.moderation_status = 1 -- Approved
      and         fact_picking_lists.direction = 'outbound'
      order by    fact_picking_lists.id desc
      """

In [45]:
# @title Define a SQL query to select data from a "fact_picking_lists" like in a View

sql = """
      select
                   fact_picking_lists.id as fact_picking_lists__id
                  ,dim_dates."year" || '-' || dim_dates."month" || '-' || dim_dates."date" as fact_picking_lists__date
                  ,to_char((LPAD((dim_times.hour_of_day || ''), 2, '0') || '' || LPAD((dim_times.minute_of_hour || ''), 2, '0') || '00')::time,'HH24:MI:SS') as fact_picking_lists__time
                  ,GREATEST(
                      dim_picking_lists.created_at,
                      dim_picking_lists.updated_at,
                      dim_picking_lists.moderated_at, (
                      dim_picking_lists.document_date + '08:30:01'::time)
                  ) as lasted_updated_at
                  ,dim_picking_lists.document_number as dim_picking_lists__document_number
                  ,dim_picking_lists.id as dim_picking_lists__id
                  ,dim_stock_keeping_units.id as dim_stock_keeping_units__sku_id
                  ,fact_picking_lists.quantity as fact_picking_lists__quantity
                  -- , (fact_picking_lists.quantity  * multiply_hierarchy_unit_quantity(fact_picking_lists.stock_keeping_unit_id)) as liters
                  ,dim_families.is_alcoholic as dim_families__is_alcoholic
                  ,dim_picking_lists.owner_id as dim_picking_lists__owner_id
                  ,dim_picking_lists.owner_type as dim_picking_lists__owner_type
      from		    fact_picking_lists
      inner join	dim_dates on dim_dates.id = fact_picking_lists.date_id
      inner join	dim_times on dim_times.id = fact_picking_lists.time_id
      inner join	dim_families on dim_families.id = fact_picking_lists.family_id
      inner join	dim_brands on dim_brands.id = dim_families.brand_id
      inner join 	dim_picking_lists on dim_picking_lists.id = fact_picking_lists.picking_list_id
      inner join 	dim_stock_keeping_units on dim_stock_keeping_units.id = fact_picking_lists.stock_keeping_unit_id
      where       fact_picking_lists.deleted_at is null
      and         dim_picking_lists.moderation_status = 1 -- Approved
      and         fact_picking_lists.direction = 'outbound'
      order by    fact_picking_lists.id desc
      """

In [46]:
# @title Create a cursor object to execute queries to dataframe

with conn.cursor() as cursor:
  # Execute the query
  cursor.execute(sql)
  columns = [desc[0] for desc in cursor.description]
  # Fetch all rows from the query result
  rows = cursor.fetchall()

  # Create dataframe
  df = pd.DataFrame(rows, columns=columns)

In [47]:
df.head(5)

Unnamed: 0,fact_picking_lists__id,fact_picking_lists__date,fact_picking_lists__time,lasted_updated_at,dim_picking_lists__document_number,dim_picking_lists__id,dim_stock_keeping_units__sku_id,fact_picking_lists__quantity,dim_families__is_alcoholic,dim_picking_lists__owner_id,dim_picking_lists__owner_type
0,9c4ab3df-ff4a-4fc6-879f-29525fab26f1,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,73337333-7333-7333-7333-733373337333,1.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate
1,9c4ab3df-fc3b-4d63-9fc5-6c4ef371d4df,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,0979879a-c604-441d-afa2-91b8c68e7fc1,1.0,,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate
2,9c4ab3df-f932-4272-a88b-703d9dcd0517,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,64646464-6464-6464-6464-646464646464,1.0,,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate
3,9c4ab3df-f62f-46b0-8120-966184772cc0,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,8cc85ca5-dda3-42d4-b4a3-5d74913a10b2,3.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate
4,9c4ab3df-f31c-4bef-a42e-b7872168006a,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,9cb8814c-5184-4131-ae09-6691399b7a70,1.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate


In [52]:
df['fact_picking_lists__podatetime'] = pd.to_datetime( df['fact_picking_lists__date']  + ' ' + df['fact_picking_lists__time'], infer_datetime_format=True)
df['lasted_updated_at'] = pd.to_datetime(df['lasted_updated_at'], infer_datetime_format = True)
df.index =  df['fact_picking_lists__id']

In [53]:
df.head(5)

Unnamed: 0_level_0,fact_picking_lists__id,fact_picking_lists__date,fact_picking_lists__time,lasted_updated_at,dim_picking_lists__document_number,dim_picking_lists__id,dim_stock_keeping_units__sku_id,fact_picking_lists__quantity,dim_families__is_alcoholic,dim_picking_lists__owner_id,dim_picking_lists__owner_type,fact_picking_lists__podatetime
fact_picking_lists__id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
9c4ab3df-ff4a-4fc6-879f-29525fab26f1,9c4ab3df-ff4a-4fc6-879f-29525fab26f1,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,73337333-7333-7333-7333-733373337333,1.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate,2024-06-15 17:16:00
9c4ab3df-fc3b-4d63-9fc5-6c4ef371d4df,9c4ab3df-fc3b-4d63-9fc5-6c4ef371d4df,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,0979879a-c604-441d-afa2-91b8c68e7fc1,1.0,,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate,2024-06-15 17:16:00
9c4ab3df-f932-4272-a88b-703d9dcd0517,9c4ab3df-f932-4272-a88b-703d9dcd0517,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,64646464-6464-6464-6464-646464646464,1.0,,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate,2024-06-15 17:16:00
9c4ab3df-f62f-46b0-8120-966184772cc0,9c4ab3df-f62f-46b0-8120-966184772cc0,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,8cc85ca5-dda3-42d4-b4a3-5d74913a10b2,3.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate,2024-06-15 17:16:00
9c4ab3df-f31c-4bef-a42e-b7872168006a,9c4ab3df-f31c-4bef-a42e-b7872168006a,2024-6-15,17:16:00,2024-06-15 17:16:33,78379,9c4ab3df-b951-4052-933f-f9b0c503b07d,9cb8814c-5184-4131-ae09-6691399b7a70,1.0,True,43666aa9-17ca-4b2e-870d-79ef49d6be33,App\Models\Syndicate,2024-06-15 17:16:00


In [54]:
# Close the connection
conn.close()

In [55]:
# @title Save to csv for local work

# Convert the DataFrame to a dictionary with lists as values
data_dict = df.to_dict(orient="list")

# Specify the output file path
output_file = data_path/"output.csv"

# Write the query result to CSV
df.to_csv(output_file)

In [56]:
df.describe()

Unnamed: 0,lasted_updated_at,dim_picking_lists__document_number,fact_picking_lists__podatetime
count,31081,31081.0,31081
mean,2023-11-24 06:25:10.491457792,74971.264663,2023-10-25 16:48:52.554293504
min,2023-04-24 16:59:36,71475.0,2022-04-11 17:04:00
25%,2023-08-04 11:35:30,73238.0,2023-06-24 08:43:00
50%,2023-12-06 15:38:43,74960.0,2023-10-28 16:06:00
75%,2024-02-28 15:14:41,76711.0,2024-02-27 09:30:00
max,2024-06-15 17:16:33,78379.0,2024-06-15 17:16:00
std,,2001.219507,
