Script for uploading CSV to Postgres.

In [1]:
import csv
import psycopg2
from psycopg2 import sql

In [2]:
csv_file_path = "../data/weather_station_list/data/weather_sta_list_ENG.csv"
conn_string = "postgresql://lizavabistsevits:@localhost:5432/taiwan" # database connection string

In [3]:
def print_csv_rows(csv_file):

    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        
        # Create dictionary for each row
        for row in csv_reader:
            print(row)  


In [10]:
#print_csv_rows(csv_file_path)

In [5]:
# Open database connection
conn = psycopg2.connect(conn_string)
cursor = conn.cursor()

In [6]:
# 1. Execute a query
cursor.execute("SELECT * FROM stations LIMIT 5;")

# 2. Fetch results
# Fetch all rows
all_results = cursor.fetchall()
print("All results:", all_results)

All results: []


In [7]:
# Close database connection
cursor.close()
conn.close()

In [8]:
def upload_csv_to_existing_db_table(csv_file, table_name, conn_string, column_names):
    # Connect to database
    conn = psycopg2.connect(conn_string)
    cursor = conn.cursor()

    # Create a set to record processed entries
    processed_codes = set()
    duplicates = 0
    
    # Find code column if it exists
    code_column = next((col for col in column_names.keys() if col.lower() == "code"), None)
    
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        
        # Insert data row by row
        for row in csv_reader:
            # Check for duplicate codes
            if code_column and code_column in row:
                code_value = row[code_column]
                # Skip this row if the code has been already processed
                if code_value in processed_codes:
                    duplicates += 1
                    continue
                # Add to the tracking set
                processed_codes.add(code_value)
            
            # Prepare column names and values for the specific columns
            columns = []
            values = []
            
            for csv_col, db_col in column_names.items():
                if csv_col in row:
                    columns.append(db_col)
                    # Convert empty strings to NULL
                    if row[csv_col] == "" or row[csv_col] is None:
                        values.append(None)
                    else:
                        values.append(row[csv_col])
            
            # Skip if no valid columns to insert
            if not columns:
                continue
            
            # Create the INSERT query with only the specified columns
            columns_str = sql.SQL(', ').join(sql.Identifier(col) for col in columns)
            placeholders = sql.SQL(', ').join([sql.SQL('%s')] * len(values))
            
            # Use sql.SQL to safely handle table and column names
            insert_query = sql.SQL('INSERT INTO {} ({}) VALUES ({});').format(
                sql.Identifier(table_name),
                columns_str,
                placeholders
            )
            
            # Execute the query
            cursor.execute(insert_query, values)
    
    # Commit changes and close connection
    conn.commit()
    cursor.close()
    conn.close()
    
    print(f"Data from {csv_file} successfully imported to {table_name} table.")
    print(f"Processed {len(processed_codes)} unique entries and skipped {duplicates} duplicates.")

In [9]:
columns = {
    "Code": "code",
    "Name": "name",
    "Original_Name" : "orig_name",
    "Type": "type",
    "Latitude": "latitude",
    "Longitude": "longitude",
	"Altitude" : "altitude",
	"Data_Start_Date" : "data_start_date",
	"Data_End_Date" : "data_end_date"
}

upload_csv_to_existing_db_table(csv_file_path, "stations", conn_string, columns)

Data from ../data/weather_station_list/data/weather_sta_list_ENG.csv successfully imported to stations table.
Processed 1155 unique entries and skipped 80 duplicates.
