In [7]:
# !pip install "cloud-sql-python-connector[pg8000]"
# !pip install psycopg2

In [1]:
import psycopg2
from psycopg2 import extras
import pandas as pd

In [24]:
# Replace these with your actual details
user="xxxxx"
password="xxxxx"
database = "cfpb"
host = "xx.xx.xx.xx"

# Connect to your postgres server
conn = psycopg2.connect(
    dbname=database,
    user=user,
    password=password,
    host=host
)
# Create a cursor object
cur = conn.cursor()

In [3]:
# A quick sanity check

# Execute a query
cur.execute("SELECT NOW()")
# Fetch the result
result = cur.fetchone()
print(f"Current time: {result[0]}")

Current time: 2023-07-05 02:31:06.945207+00:00


In [4]:
cfpb = pd.read_csv("complaints-2023-07-04_01_06.csv")

In [5]:
cfpb.columns

Index(['Date received', 'Product', 'Sub-product', 'Issue', 'Sub-issue',
       'Consumer complaint narrative', 'Company public response', 'Company',
       'State', 'ZIP code', 'Tags', 'Consumer consent provided?',
       'Submitted via', 'Date sent to company', 'Company response to consumer',
       'Timely response?', 'Consumer disputed?', 'Complaint ID'],
      dtype='object')

In [6]:
cfpb.dtypes

Date received                    object
Product                          object
Sub-product                      object
Issue                            object
Sub-issue                        object
Consumer complaint narrative     object
Company public response          object
Company                          object
State                            object
ZIP code                         object
Tags                             object
Consumer consent provided?       object
Submitted via                    object
Date sent to company             object
Company response to consumer     object
Timely response?                 object
Consumer disputed?              float64
Complaint ID                      int64
dtype: object

In [16]:
cfpb_sample = cfpb[~cfpb['Consumer complaint narrative'].isna()].sample(100)

In [17]:
# Convert 'Date_received' and 'Date_sent_to_company' from 'mm/dd/yy' to 'yyyy-mm-dd'
cfpb_sample['Date received'] = pd.to_datetime(cfpb_sample['Date received'], format='%m/%d/%y').dt.strftime('%Y-%m-%d')
cfpb_sample['Date sent to company'] = pd.to_datetime(cfpb_sample['Date sent to company'], format='%m/%d/%y').dt.strftime('%Y-%m-%d')
cfpb_sample['Complaint ID'].fillna(0, inplace=True)
cfpb_sample['Complaint ID'] = cfpb_sample['Complaint ID'].astype(float)

In [18]:
cfpb_sample.dtypes

Date received                    object
Product                          object
Sub-product                      object
Issue                            object
Sub-issue                        object
Consumer complaint narrative     object
Company public response          object
Company                          object
State                            object
ZIP code                         object
Tags                             object
Consumer consent provided?       object
Submitted via                    object
Date sent to company             object
Company response to consumer     object
Timely response?                 object
Consumer disputed?              float64
Complaint ID                    float64
dtype: object

In [19]:
# create a dictionary to map old column names to new column names
rename_dict = {
    'Date received': 'Date_received',
    'Product': 'Product',
    'Sub-product': 'Sub_product',
    'Issue': 'Issue',
    'Sub-issue': 'Sub_issue',
    'Consumer complaint narrative': 'Consumer_complaint_narrative',
    'Company public response': 'Company_public_response',
    'Company': 'Company',
    'State': 'State',
    'ZIP code': 'ZIP_code',
    'Tags': 'Tags',
    'Consumer consent provided?': 'Consumer_consent_provided',
    'Submitted via': 'Submitted_via',
    'Date sent to company': 'Date_sent_to_company',
    'Company response to consumer': 'Company_response_to_consumer',
    'Timely response?': 'Timely_response',
    'Consumer disputed?': 'Consumer_disputed',
    'Complaint ID': 'Complaint_ID'
}

# rename the columns
cfpb_sample.rename(columns=rename_dict, inplace=True)
cfpb_sample

Unnamed: 0,Date_received,Product,Sub_product,Issue,Sub_issue,Consumer_complaint_narrative,Company_public_response,Company,State,ZIP_code,Tags,Consumer_consent_provided,Submitted_via,Date_sent_to_company,Company_response_to_consumer,Timely_response,Consumer_disputed,Complaint_ID
56007,2023-04-18,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,My name is XXXX XXXX XXXX XXXX this complaint ...,,Experian Information Solutions Inc.,PA,15001,,Consent provided,Web,2023-04-18,Closed with explanation,Yes,,6856028.0
45732,2023-04-11,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Reporting company used your report improperly,In accordance with the Fair Credit Reporting a...,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,KS,66030,,Consent provided,Web,2023-04-11,Closed with explanation,Yes,,6826594.0
218786,2023-04-22,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Account information incorrect,XX/XX/2021 XXXX XXXX XXXX XXXX closed my accou...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",DE,19701,,Consent provided,Web,2023-04-22,Closed with non-monetary relief,Yes,,6875861.0
172834,2023-04-24,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,I want to inform the Credit Bureaus that I was...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",CA,92881,,Consent provided,Web,2023-04-24,Closed with non-monetary relief,Yes,,6878070.0
248570,2023-05-01,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Their investigation did not fix an error on yo...,This is my numerous request that you amend thi...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",VA,22405,,Consent provided,Web,2023-05-01,Closed with non-monetary relief,Yes,,6912216.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102728,2023-04-20,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Difficulty submitting a dispute or getting inf...,Though it does credit card business in the mil...,Company has responded to the consumer and the ...,U.S. BANCORP,MI,48168,,Consent provided,Web,2023-04-20,Closed with explanation,Yes,,6860880.0
53843,2023-04-17,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Their investigation did not fix an error on yo...,"When I reviewed my credit report, I discovered...",Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",LA,70122,,Consent provided,Web,2023-04-17,Closed with explanation,Yes,,6846108.0
284290,2023-04-19,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,Please remove these Fraudulent accounts from m...,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,FL,33162,,Consent provided,Web,2023-04-19,Closed with explanation,Yes,,6862517.0
142761,2023-04-22,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Reporting company used your report improperly,In accordance with the Fair Credit Reporting a...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",TX,76137,,Consent provided,Web,2023-04-22,Closed with non-monetary relief,Yes,,6871543.0


In [13]:
# Create table
table_creation_query = """
    CREATE TABLE cfpb (
        id SERIAL PRIMARY KEY,
        "Date_received" DATE,
        "Product" VARCHAR(255),
        "Sub_product" VARCHAR(255),
        "Issue" VARCHAR(255),
        "Sub_issue" VARCHAR(255),
        "Consumer_complaint_narrative" TEXT,
        "Company_public_response" VARCHAR(255),
        "Company" VARCHAR(255),
        "State" VARCHAR(255),
        "ZIP_code" VARCHAR(255),
        "Tags" VARCHAR(255),
        "Consumer_consent_provided" VARCHAR(255),
        "Submitted_via" VARCHAR(255),
        "Date_sent_to_company" DATE,
        "Company_response_to_consumer" VARCHAR(255),
        "Timely_response" VARCHAR(255),
        "Consumer_disputed" VARCHAR(255),
        "Complaint_ID" INT UNIQUE
    );
"""

cur.execute(table_creation_query)
# Commit your changes
conn.commit()

In [25]:
# Convert DataFrame to a list of tuples
records = cfpb_sample.to_records(index=False)
result = list(records)

insert_query_base = """
INSERT INTO cfpb ("Date_received", "Product", "Sub_product", "Issue", "Sub_issue", "Consumer_complaint_narrative", 
"Company_public_response", "Company", "State", "ZIP_code", "Tags", "Consumer_consent_provided", "Submitted_via", 
"Date_sent_to_company", "Company_response_to_consumer", "Timely_response", "Consumer_disputed", "Complaint_ID") 
VALUES %s ON CONFLICT ("Complaint_ID") DO NOTHING
"""
psycopg2.extras.execute_values(cur, insert_query_base, result)

# Commit your changes
conn.commit()

In [26]:
query = "SELECT * FROM cfpb"

# # Method 1
# # Fetch data in chunks
# chunk_size = 10000  # you can adjust this value depending on your system's memory and requirements
# chunks = []
# for chunk in pd.read_sql_query(query, conn, chunksize=chunk_size):
#     # process data in chunks as needed, or save each chunk for later processing
#     chunks.append(chunk)

# # Combine chunks into one DataFrame
# df = pd.concat(chunks)


# # Method 2
# # Execute the query
# cur.execute(query)
# # Fetch all rows
# rows = cur.fetchall()
# # Get the column names
# colnames = [desc[0] for desc in cur.description]
# # Convert to DataFrame
# df = pd.DataFrame(rows, columns=colnames)

# Method 3
df = pd.read_sql_query(query, conn)
df



Unnamed: 0,id,Date_received,Product,Sub_product,Issue,Sub_issue,Consumer_complaint_narrative,Company_public_response,Company,State,ZIP_code,Tags,Consumer_consent_provided,Submitted_via,Date_sent_to_company,Company_response_to_consumer,Timely_response,Consumer_disputed,Complaint_ID
0,1,2023-04-07,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Was not notified of investigation status or re...,,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,IN,46214,,Consent not provided,Web,2023-04-07,Closed with explanation,Yes,,6802950
1,2,2023-05-05,"Payday loan, title loan, or personal loan",Title loan,Problem with the payoff process at the end of ...,,,,"SANTANDER HOLDINGS USA, INC.",NJ,08618,,,Phone,2023-05-09,Closed with non-monetary relief,Yes,,6942452
2,3,2023-04-30,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Reporting company used your report improperly,,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,FL,34746,,Consent not provided,Web,2023-04-30,Closed with explanation,Yes,,6909361
3,4,2023-06-15,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,,,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",TX,77065,,,Web,2023-06-15,In progress,Yes,,7122705
4,5,2023-04-06,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Credit inquiries on your report that you don't...,,Company has responded to the consumer and the ...,TRUIST FINANCIAL CORPORATION,FL,33129,,Consent not provided,Web,2023-04-06,Closed with explanation,Yes,,6804505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,196,2023-04-20,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Difficulty submitting a dispute or getting inf...,Though it does credit card business in the mil...,Company has responded to the consumer and the ...,U.S. BANCORP,MI,48168,,Consent provided,Web,2023-04-20,Closed with explanation,Yes,,6860880
196,197,2023-04-17,"Credit reporting, credit repair services, or o...",Credit reporting,Problem with a credit reporting company's inve...,Their investigation did not fix an error on yo...,"When I reviewed my credit report, I discovered...",Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",LA,70122,,Consent provided,Web,2023-04-17,Closed with explanation,Yes,,6846108
197,198,2023-04-19,"Credit reporting, credit repair services, or o...",Credit reporting,Incorrect information on your report,Information belongs to someone else,Please remove these Fraudulent accounts from m...,Company has responded to the consumer and the ...,Experian Information Solutions Inc.,FL,33162,,Consent provided,Web,2023-04-19,Closed with explanation,Yes,,6862517
198,199,2023-04-22,"Credit reporting, credit repair services, or o...",Credit reporting,Improper use of your report,Reporting company used your report improperly,In accordance with the Fair Credit Reporting a...,Company has responded to the consumer and the ...,"TRANSUNION INTERMEDIATE HOLDINGS, INC.",TX,76137,,Consent provided,Web,2023-04-22,Closed with non-monetary relief,Yes,,6871543


In [27]:
# Close the cursor and connection
cur.close()
conn.close()