In [12]:
import pandas as pd
from peewee import (
    Model,
    SqliteDatabase,
    AutoField,
    CharField,
    TextField,
    IntegerField,
    Check
)

In [13]:
fn = "Coal company - coal_db_sync.csv" #"Coal company (copy from eve) - Sheet1.csv"
fn1 = "company_202504230053.csv"

In [14]:
operation_province_constraints = (
    "Aceh",
    "Bali",
    "Kepulauan Bangka Belitung",
    "Banten",
    "Bengkulu",
    "Gorontalo",
    "Papua Barat",
    "Jakarta",
    "Jambi",
    "Jawa Barat",
    "Jawa Tengah",
    "Jawa Timur",
    "Kalimantan Barat",
    "Kalimantan Selatan",
    "Kalimantan Tengah",
    "Kalimantan Timur",
    "Kalimantan Utara",
    "Kepulauan Riau",
    "Lampung",
    "Maluku Utara",
    "Maluku",
    "Nusa Tenggara Barat",
    "Nusa Tenggara Timur",
    "Papua",
    "Riau",
    "Sulawesi Barat",
    "Sulawesi Selatan",
    "Sulawesi Tengah",
    "Sulawesi Tenggara",
    "Sulawesi Utara",
    "Sumatera Barat",
    "Sumatera Selatan",
    "Sumatera Utara",
    "Yogyakarta"
)
company_type_constraints = (
    'Holding', 
    'Mine Owner', 
    'Consultant', 
    'Logistics', 
    'Contractor', 
    'Trader', 
    'Other Service Provider'
)
key_operation_constraints = (
    'Mining', 
    'Mining Services', 
    'Equipment Rental', 
    'Logistic Management', 
    'Transshipment', 
    'Barging & Transshipment', 
    'Overburden Removal & Hauling', 
    'Dredging', 
    'Trading', 
    'Coal Trading', 
    'Investment'
)

In [15]:
db = SqliteDatabase('coal_db.sqlite')

In [16]:
class Company(Model):
    id = IntegerField(primary_key=True)
    name = CharField()
    idx_ticker = CharField(null=True)
    operation_province = CharField(
        null=True,
        constraints=[
            Check(f"operation_province IN {operation_province_constraints}")
        ]
    )
    operation_kabkot = CharField(null=True)
    representative_address = TextField(null=True)
    company_type = CharField(
        null=True,
        constraints=[
            Check(f"company_type IN {company_type_constraints}")
        ]
    )
    key_operation = CharField(
        constraints=[
            Check(f"key_operation IN {key_operation_constraints}")
        ]
    )
    activities = TextField(
        null=True,
        constraints=[Check("json_valid(activities)")]
    )
    website = CharField(null=True)
    phone_number = IntegerField(null=True)
    email = CharField(null=True)

    class Meta:
        database = db
        table_name = 'company'


In [17]:
companies = Company.select()
for company in companies:
    pass

In [48]:
df = pd.read_csv(fn)
df_ccol = ['Company Name', 'Ticker', 'Head Office Address', 'Website', 'Phone', 'Email', 'Operation Province', 'Company Type', 'Key Operation']
# df_ccol = ['Ticker', 'Head Office Address', 'Website', 'Phone', 'Email address', 'Operation Location', 'Company Type', 'Key Operation']
df = df.loc[:, df_ccol]

df1_ccol = ['name', 'idx_ticker', 'representative_address', 'website', 'phone_number', 'email', 'operation_province', 'company_type', 'key_operation']
df1 = pd.read_csv(fn1)

In [49]:
df_group = df.groupby('Company Name')[df_ccol].first() 
df_group = df_group.rename(columns={c: c1 for (c1, c) in zip(df1_ccol, df_ccol)})

matching = df1['name'].isin(df_group.index)

In [50]:
# for idx, row in df1.loc[matching, df1_ccol].iterrows():
#     print(row[df1_ccol].to_dict())
#     print(df_group.loc[row['name'], df1_ccol].to_dict(), '\n')

In [51]:
add_df = df[~df['Company Name'].isin(df1.loc[matching, 'name'])]
add_df = add_df.loc[add_df['Key Operation'].notna(), :]

In [52]:
add_df

Unnamed: 0,Company Name,Ticker,Head Office Address,Website,Phone,Email,Operation Province,Company Type,Key Operation
193,PT Gorby Putra Utama,,,,,,Sumatera Selatan,Mine Owner,Mining
194,PT Gorby Global Energi,,,,,,Sumatera Selatan,Mine Owner,Mining
195,PT Cipta Wana Dana,,,,,,Sumatera Selatan,Mine Owner,Mining
196,PT Gorby Energy,,,,,,Sumatera Selatan,Mine Owner,Mining
197,PT Banyan Koalindo Lestari,,,https://banyankoalindolestari.co.id/,021-7115621177,bkl@banyankoalindo.com,Sumatera Selatan,Mine Owner,Mining
198,PT Alhasanie,,,,,,Kalimantan Timur,Mine Owner,Mining
199,PT Borneo Minerals,,,,,,Kalimantan Timur,Mine Owner,Mining
200,PT Diva Kencana Borneo,,,,,,Kalimantan Timur,Mine Owner,Mining
201,PT Karya Borneo Agung,,,,,,Kalimantan Timur,Mine Owner,Mining
202,PT Bara Karya Agung,,,,,,Kalimantan Timur,Mine Owner,Mining


In [53]:
add_df.loc[add_df.groupby('Company Name').head(1).index]

Unnamed: 0,Company Name,Ticker,Head Office Address,Website,Phone,Email,Operation Province,Company Type,Key Operation
193,PT Gorby Putra Utama,,,,,,Sumatera Selatan,Mine Owner,Mining
194,PT Gorby Global Energi,,,,,,Sumatera Selatan,Mine Owner,Mining
195,PT Cipta Wana Dana,,,,,,Sumatera Selatan,Mine Owner,Mining
196,PT Gorby Energy,,,,,,Sumatera Selatan,Mine Owner,Mining
197,PT Banyan Koalindo Lestari,,,https://banyankoalindolestari.co.id/,021-7115621177,bkl@banyankoalindo.com,Sumatera Selatan,Mine Owner,Mining
198,PT Alhasanie,,,,,,Kalimantan Timur,Mine Owner,Mining
199,PT Borneo Minerals,,,,,,Kalimantan Timur,Mine Owner,Mining
200,PT Diva Kencana Borneo,,,,,,Kalimantan Timur,Mine Owner,Mining
201,PT Karya Borneo Agung,,,,,,Kalimantan Timur,Mine Owner,Mining
202,PT Bara Karya Agung,,,,,,Kalimantan Timur,Mine Owner,Mining


In [54]:
add_df[add_df['Company Name'].duplicated()]

Unnamed: 0,Company Name,Ticker,Head Office Address,Website,Phone,Email,Operation Province,Company Type,Key Operation
204,PT Hanson Energy,,,,,,Sumatera Selatan,Mine Owner,Mining


In [55]:
def safe_value(val):
    return None if pd.isna(val) else val

for _, row in add_df.loc[add_df.groupby('Company Name').head(1).index].iterrows():
    Company.create(
        name=safe_value(row['Company Name']),
        idx_ticker=safe_value(row['Ticker']),
        representative_address=safe_value(row['Head Office Address']),
        website=safe_value(row['Website']),
        phone_number=safe_value(row['Phone']),
        email=safe_value(row['Email']),
        operation_province=safe_value(row['Operation Province']),
        company_type=safe_value(row['Company Type']),
        key_operation=safe_value(row['Key Operation'])
    )