In [1]:
import psycopg2
import pandas as pd 
from io import StringIO

In [2]:
pg_creds = {
    "database": "mydatabase", 
    "host": "localhost", 
    "port": 5432, 
    "user": "myuser", 
    "password": "mypassword"
}
conn = psycopg2.connect(**pg_creds)

In [None]:
df = pd.read_csv("postgres-import/fraudTestSample.csv")
columns = [
    "trans_num", "trans_date_trans_time", "cc_num", "merchant", "category", "amt",
    "first", "last", "gender", "street", "city", "state", "zip", "lat", "long", "city_pop",
    "job", "dob", "unix_time", "merch_lat", "merch_long", "merch_is_fraud", "merch_latitude",
    "merch_longitude", "merch_index_right", "merch_zip_code", "merch_po_name",
    "merch_state", "merch_population", "merch_pop_sqmi", "merch_sqmi",
    "merch_shape_length", "merch_shape_area"
]

df = df[columns]
df["cc_num"] = df["cc_num"].astype(int)
df["city_pop"] = df["city_pop"].astype(int)
df["unix_time"] = df["unix_time"].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 33 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   trans_num              50000 non-null  object 
 1   trans_date_trans_time  50000 non-null  object 
 2   cc_num                 50000 non-null  int64  
 3   merchant               50000 non-null  object 
 4   category               50000 non-null  object 
 5   amt                    50000 non-null  float64
 6   first                  50000 non-null  object 
 7   last                   50000 non-null  object 
 8   gender                 50000 non-null  object 
 9   street                 50000 non-null  object 
 10  city                   50000 non-null  object 
 11  state                  50000 non-null  object 
 12  zip                    50000 non-null  float64
 13  lat                    50000 non-null  float64
 14  long                   50000 non-null  float64
 15  ci

In [4]:
df.head()

Unnamed: 0,trans_num,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,merch_longitude,merch_index_right,merch_zip_code,merch_po_name,merch_state,merch_population,merch_pop_sqmi,merch_sqmi,merch_shape_length,merch_shape_area
0,62edd1f19b50171ab778f3998596bd46,16-11-2020 23:06,4657270000000000,fraud_Bahringer-Streich,food_dining,120.29,Alicia,Mitchell,F,125 Kristen Dale,...,-85.108823,10066.0,32321.0,Bristol,FL,6169.0,16.82,366.66,2.314057,0.089038
1,6e95c0e146ed34f37b09ceaf8a805053,23-11-2020 03:54,6011890000000000,fraud_Smitham-Boehm,grocery_net,49.52,Erin,Chavez,F,3379 Williams Common,...,-104.653314,27124.0,80137.0,Watkins,CO,2283.0,19.92,114.58,1.24132,0.031176
2,b6d34dc62367d39c71e4d251fb20c2ff,16-07-2020 12:59,372382000000000,fraud_Schiller Ltd,personal_care,4.46,Brent,Terrell,M,502 Rice Plaza Apt. 979,...,-73.774035,3333.0,12514.0,Clinton Corners,NY,2766.0,107.04,25.84,0.638311,0.007258
3,1b15e86c96d43d090cd1d7c4d23f1845,27-12-2020 17:46,4294040000000000,fraud_Rau-Grant,kids_pets,63.11,Gail,Weaver,F,979 Stewart Lake,...,-81.534687,9205.0,29808.0,Aiken,SC,0.0,0.0,255.05,1.368997,0.063922
4,55534a8def2e2b921364451ace8dad28,18-12-2020 02:20,3504180000000000,fraud_Kris-Weimann,misc_net,8.29,Drew,Garcia,M,70858 Jasmine Meadows Apt. 135,...,-84.871755,14965.0,47371.0,Portland,IN,11782.0,55.06,214.0,1.674166,0.058813


In [5]:
buffer = StringIO()
df.to_csv(buffer, index=False, header=False)
buffer.seek(0)

cursor = conn.cursor()

try:
    cursor.execute(
        """
        CREATE TABLE transactions (
        trans_num VARCHAR(50) PRIMARY KEY,  -- Use trans_num as primary key instead of a SERIAL id
        trans_date_trans_time TEXT,
        cc_num BIGINT,
        merchant VARCHAR(100),
        category VARCHAR(50),
        amt DECIMAL(10, 2),
        first VARCHAR(50),
        last VARCHAR(50),
        gender CHAR(1),
        street VARCHAR(100),
        city VARCHAR(50),
        state CHAR(2),
        zip DECIMAL(10, 1),
        lat DECIMAL(9, 6),
        long DECIMAL(9, 6),
        city_pop INTEGER,
        job VARCHAR(100),
        dob TEXT,
        unix_time BIGINT,
        merch_lat DECIMAL(9, 6),
        merch_long DECIMAL(9, 6),
        -- Additional columns from CSV
        merch_is_fraud DECIMAL(2, 1),
        merch_latitude DECIMAL(9, 6),
        merch_longitude DECIMAL(9, 6),
        merch_index_right DECIMAL(10, 1),
        merch_zip_code DECIMAL(10, 1),
        merch_po_name VARCHAR(50),
        merch_state CHAR(2),
        merch_population DECIMAL(10, 1),
        merch_pop_sqmi DECIMAL(10, 2),
        merch_sqmi DECIMAL(10, 2),
        merch_shape_length DECIMAL(12, 9),
        merch_shape_area DECIMAL(12, 9)
    );
        """
    )

    cursor.copy_expert(
        f"""
        COPY transactions (
            trans_num, trans_date_trans_time, cc_num, merchant, category, amt, 
            first, last, gender, street, city, state, zip, lat, long, city_pop, 
            job, dob, unix_time, merch_lat, merch_long, merch_is_fraud, merch_latitude, 
            merch_longitude, merch_index_right, merch_zip_code, merch_po_name, 
            merch_state, merch_population, merch_pop_sqmi, merch_sqmi, 
            merch_shape_length, merch_shape_area
        ) FROM STDIN WITH CSV
        """,
        buffer
    )
    conn.commit()
    print("Data successfully ingested into PostgreSQL.")
except Exception as e:
    conn.rollback()
    print(f"Error: {e}")
finally:
    cursor.close()
    conn.close()

Data successfully ingested into PostgreSQL.
