# Create and import customer data to Postgresql

In [12]:
import pandas as pd
import numpy as np

### Load data from CSV

In [13]:
# Read the CSV file
df = pd.read_csv('/mnt/c/Projects/pipelines/data/customer.csv',
                 dtype={"wedding_date_07": str,
                        "customer_id": 'Int64',
                        "customer_code": str,
                        "group_id": 'Int64',
                        "group_name": str,
                        "level_id": 'Int64',
                        "level_name": str,
                        "type_id": 'Int64',
                        "type_name": str,
                        "email": str,
                        "full_name": str,
                        "gender": str,
                        "phone": str,
                        "status": str,
                        "country": str,
                        "city": str,
                        "district": str,
                        "ward": str,
                        "full_address": str,
                        "eligible_for_contact": bool},
                        parse_dates=["created_date_07", "birthday_date_07"])


In [14]:
df['birthday_date_07'] = pd.to_datetime(df['birthday_date_07'], errors="coerce").fillna(pd.to_datetime('1900-01-01'))

#### birth_day

#### group_id
- impute missing values with 0 - "No Group"
- convert the column to integer

In [15]:
# group_id, impute missing values with 0 - No Group, convert the column to integer
df['group_id'] = df['group_id'].fillna(0).astype(int)

In [16]:
# group_name, impute missing values with 0 - No Group, convert the column to integer
df['group_name'] = df['group_name'].fillna("No Group")

#### level_id

In [17]:
# group_id, impute missing values with 0 - No Level, convert the column to integer
df['level_id'] = df['level_id'].fillna(0).astype(int)

In [18]:
# group_name, impute missing values with 0 - No Level, convert the column to integer
df['level_name'] = df['level_name'].fillna("Khách hàng chưa có hạng thành viên")

#### type_id

In [19]:
# group_id, impute missing values with 0 - KHÁCH LẺ, convert the column to integer
df['type_id'] = df['type_id'].fillna(0).astype(int)

In [20]:
# group_name, impute missing values with 0 - KHÁCH LẺ, convert the column to integer
df['type_name'] = df['type_name'].fillna("KHÁCH LẺ")

#### email

In [21]:
df['email'] = df['email'].fillna("")

#### gender

In [22]:
df['email'] = df['email'].fillna("")

In [23]:
df['country'] = df['country'].fillna("UNKNOWN")
df['city'] = df['city'].fillna("UNKNOWN")
df['district'] = df['district'].fillna("UNKNOWN")
df['full_address'] = df['full_address'].fillna("UNKNOWN")
df['ward'] = df['ward'].fillna("UNKNOWN")
df['wedding_date_07'] = df['wedding_date_07'].fillna("")
df['gender'] = df['gender'].fillna("")

In [24]:
# check missing values
missing = df.isnull().sum()
missing

customer_id             0
customer_code           0
created_date_07         0
birthday_date_07        0
group_id                0
group_name              0
level_id                0
level_name              0
type_id                 0
type_name               0
email                   0
full_name               0
gender                  0
phone                   0
status                  0
wedding_date_07         0
country                 0
city                    0
district                0
ward                    0
full_address            0
eligible_for_contact    0
dtype: int64

In [25]:
df.dtypes

customer_id                      Int64
customer_code                   object
created_date_07         datetime64[ns]
birthday_date_07        datetime64[ns]
group_id                         int64
group_name                      object
level_id                         int64
level_name                      object
type_id                          int64
type_name                       object
email                           object
full_name                       object
gender                          object
phone                           object
status                          object
wedding_date_07                 object
country                         object
city                            object
district                        object
ward                            object
full_address                    object
eligible_for_contact              bool
dtype: object

In [26]:
import pandas as pd
from sqlalchemy import create_engine, Column, Integer, String, Date, Boolean, text, DateTime, ForeignKey, Float, inspect
from sqlalchemy.orm import sessionmaker, declarative_base, relationship
import os


from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)

# Create a connection to the PostgreSQL database
engine = create_engine(f"postgresql+psycopg2://{os.environ['PG_USER']}:{os.environ['PG_PASSWORD']}@{os.environ['PG_HOST']}:{os.environ['PG_PORT']}/{os.environ['PG_DB']}")

# Create a base class for declarative class definitions
Base = declarative_base()


In [27]:
# Define the table structure
class Customer(Base):
    __tablename__ = 'customers'

    # id = Column(Integer, primary_key=True)
    customer_id = Column(Integer, primary_key=True)
    customer_code = Column(String)
    created_date_07 = Column(Date)
    birthday_date_07 = Column(Date, nullable=True)
    group_id = Column(String, nullable=True)
    group_name = Column(String, nullable=True)
    level_id = Column(String, nullable=True)
    level_name = Column(String, nullable=True)
    type_id = Column(String, nullable=True)
    type_name = Column(String, nullable=True)
    email = Column(String, nullable=True)
    full_name = Column(String, nullable=True)
    gender = Column(String, nullable=True)
    phone = Column(String, nullable=True)
    status = Column(String, nullable=True)
    country = Column(String, nullable=True)
    city = Column(String, nullable=True)
    district = Column(String, nullable=True)
    ward = Column(String, nullable=True)
    full_address = Column(String, nullable=True)
    eligible_for_contact = Column(Boolean, nullable=True)


In [28]:
# Create the table in the database
Base.metadata.create_all(engine)

# Create a session
Session = sessionmaker(bind=engine)
session = Session()


In [29]:
# # drop table customers
# session.execute(text('DROP TABLE IF EXISTS customers'))
# session.commit()

In [30]:

# Insert data into the database
for _, row in df.iterrows():
    try:
        customer = Customer(
            customer_id=row['customer_id'],
            customer_code=row['customer_code'],
            created_date_07=row['created_date_07'],
            birthday_date_07=row['birthday_date_07'],
            group_id=row['group_id'],
            group_name=row['group_name'],
            level_id=row['level_id'],
            level_name=row['level_name'],
            type_id=row['type_id'],
            type_name=row['type_name'],
            email=row['email'],
            full_name=row['full_name'],
            gender=row['gender'],
            phone=row['phone'],
            status=row['status'],
            country=row['country'],
            city=row['city'],
            district=row['district'],
            ward=row['ward'],
            full_address=row['full_address'],
            eligible_for_contact=row['eligible_for_contact']
        )
        session.add(customer)
    except Exception as e:
        print(f"Error inserting row: {row}, customer: {customer}")
        print(e)

# Commit the changes and close the session
session.commit()
session.close()

print("Data import completed successfully!")

Data import completed successfully!
