In [3]:
import pandas as pd
import pyodbc
import os
from openpyxl import load_workbook
import re
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Set your SQL Server connection details
server = os.getenv('ss_server')
database = os.getenv('ss_database')
username = os.getenv('ss_username')
password = os.getenv('ss_password')
cnxn_str = f'DRIVER={{SQL Server}};SERVER={server};DATABASE={database};UID={username};PWD={password}'
cnxn = pyodbc.connect(cnxn_str)


ProgrammingError: ('42000', '[42000] [Microsoft][ODBC SQL Server Driver][SQL Server]Cannot open database "Wealth" requested by the login. The login failed. (4060) (SQLDriverConnect); [42000] [Microsoft][ODBC SQL Server Driver][SQL Server]Cannot open database "Wealth" requested by the login. The login failed. (4060)')

In [None]:

cursor = cnxn.cursor()

def sanitize_name(name):
    # Remove any character that's not alphanumeric or underscore
    name = re.sub(r'\W+', '_', name)
    # Ensure the name doesn't start with a number
    if name[0].isdigit():
        name = f"t_{name}"
    return name[:128]  # SQL Server has a 128-character limit for identifiers

def create_table_from_df(cursor, df, table_name):
    sanitized_table_name = sanitize_name(table_name)
    sql_create_table = f"IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{sanitized_table_name}') BEGIN CREATE TABLE [{sanitized_table_name}] ("
    for column in df.columns:
        sanitized_column = sanitize_name(str(column))
        sql_create_table += f"[{sanitized_column}] NVARCHAR(MAX),"
    sql_create_table = sql_create_table.rstrip(',') + ') END;'
    cursor.execute(sql_create_table)
    cursor.commit()

def load_df_to_sql(cursor, df, table_name):
    sanitized_table_name = sanitize_name(table_name)
    for index, row in df.iterrows():
        placeholders = ', '.join('?' * len(row))
        sql_insert = f"INSERT INTO [{sanitized_table_name}] VALUES ({placeholders})"
        cursor.execute(sql_insert, tuple(str(val) for val in row))
    cursor.commit()

folder_path = r'C:\Users\seanj\Downloads\scotland-blk'

for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    try:
        if file_name.endswith('.csv'):
            df = pd.read_csv(file_path, encoding='ISO-8859-1', low_memory=False)
            table_name = file_name.replace('.csv', '')
            create_table_from_df(cursor, df, table_name)
            load_df_to_sql(cursor, df, table_name)
        elif file_name.endswith('.xlsx'):
            xl = pd.ExcelFile(file_path)
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
        elif file_name.endswith('.xls'):
            try:
                xl = pd.ExcelFile(file_path)
            except ImportError:
                print(f"Unable to read {file_name}. Please install xlrd >= 2.0.1 for xls support.")
                continue
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
    except Exception as e:
        print(f"An error occurred with file {file_name}: {str(e)}")

cursor.close()
cnxn.close()

In [2]:

# Create the connection string and connect
cnxn_str = f"host={hostname} dbname={database} user={username} password={password} port={port}"
cnxn = psycopg2.connect(cnxn_str)

cursor = cnxn.cursor()

def sanitize_name(name):
    # Remove any character that's not alphanumeric or underscore
    name = re.sub(r'\W+', '_', name)
    # Ensure the name doesn't start with a number
    if name[0].isdigit():
        name = f"t_{name}"
    return name[:128]  # SQL Server has a 128-character limit for identifiers

def create_table_from_df(cursor, df, table_name):
    sanitized_table_name = sanitize_name(table_name)
    sql_create_table = f"IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{sanitized_table_name}') BEGIN CREATE TABLE [{sanitized_table_name}] ("
    for column in df.columns:
        sanitized_column = sanitize_name(str(column))
        sql_create_table += f"[{sanitized_column}] NVARCHAR(MAX),"
    sql_create_table = sql_create_table.rstrip(',') + ') END;'
    cursor.execute(sql_create_table)
    cursor.commit()

def load_df_to_sql(cursor, df, table_name):
    sanitized_table_name = sanitize_name(table_name)
    for index, row in df.iterrows():
        placeholders = ', '.join('?' * len(row))
        sql_insert = f"INSERT INTO [{sanitized_table_name}] VALUES ({placeholders})"
        cursor.execute(sql_insert, tuple(str(val) for val in row))
    cursor.commit()

folder_path = r'C:\Users\seanj\Downloads\NRS_Data'

for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    try:
        if file_name.endswith('.csv'):
            df = pd.read_csv(file_path, encoding='ISO-8859-1', low_memory=False)
            table_name = file_name.replace('.csv', '')
            create_table_from_df(cursor, df, table_name)
            load_df_to_sql(cursor, df, table_name)
        elif file_name.endswith('.xlsx'):
            xl = pd.ExcelFile(file_path)
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
        elif file_name.endswith('.xls'):
            try:
                xl = pd.ExcelFile(file_path)
            except ImportError:
                print(f"Unable to read {file_name}. Please install xlrd >= 2.0.1 for xls support.")
                continue
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
    except Exception as e:
        print(f"An error occurred with file {file_name}: {str(e)}")

cursor.close()
cnxn.close()

In [3]:

for file_name in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file_name)
    try:
        if file_name.endswith('.csv'):
            df = pd.read_csv(file_path, encoding='ISO-8859-1', low_memory=False)
            table_name = file_name.replace('.csv', '')
            create_table_from_df(cursor, df, table_name)
            load_df_to_sql(cursor, df, table_name)
        elif file_name.endswith('.xlsx'):
            xl = pd.ExcelFile(file_path)
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
        elif file_name.endswith('.xls'):
            try:
                xl = pd.ExcelFile(file_path)
            except ImportError:
                print(f"Unable to read {file_name}. Please install xlrd >= 2.0.1 for xls support.")
                continue
            for sheet_name in xl.sheet_names:
                try:
                    df = xl.parse(sheet_name)
                    table_name = f"{file_name.split('.')[0]}_{sheet_name}"
                    create_table_from_df(cursor, df, table_name)
                    load_df_to_sql(cursor, df, table_name)
                except Exception as e:
                    print(f"An error occurred with sheet {sheet_name} in file {file_name}: {str(e)}")
    except Exception as e:
        print(f"An error occurred with file {file_name}: {str(e)}")

cursor.close()
cnxn.close()

An error occurred with sheet Boys in file babies-first-names-23-full-lists.xlsx: syntax error at or near "IF"
LINE 1: IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'babies...
        ^

An error occurred with sheet Girls in file babies-first-names-23-full-lists.xlsx: syntax error at or near "IF"
LINE 1: IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'babies...
        ^

An error occurred with file babies-first-names-all-names-all-years.csv: syntax error at or near "IF"
LINE 1: IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'babies...
        ^

An error occurred with sheet ByCouncilSex in file international-migration-scotland.xlsx: syntax error at or near "IF"
LINE 1: IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'intern...
        ^

An error occurred with sheet ByNHSBoardSex in file international-migration-scotland.xlsx: syntax error at or near "IF"
LINE 1: IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'intern...
        ^

An error occurred with she

KeyboardInterrupt: 