In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Load the CSV file
df = pd.read_csv('users.csv')

# # Convert ID from its current format to hexadecimal
# df['_id.$oid'] = df['_id.$oid'].apply(lambda x: format(int(x, 32), 'x'))
df = df.drop_duplicates()

# Convert active column values to 1 or 0
df['active'] = df['active'].replace({'TRUE': 1, 'FALSE': 0})

# Ensure that all values are either 0 or 1
df['active'] = df['active'].astype(int)

# Convert scientific notation dates to datetime
df['createdDate.$date'] = pd.to_datetime(df['createdDate.$date'], unit='ms')
df['lastLogin.$date'] = pd.to_datetime(df['lastLogin.$date'], unit='ms')

# Rename columns to match MySQL table
df.rename(columns={
    '_id.$oid': 'user_id',
    'createdDate.$date': 'created_date',
    'lastLogin.$date': 'last_login_date'
}, inplace=True)

df.to_csv("users_formatted.csv")

In [None]:

# Load the CSV file with a specified encoding
file_path = 'brands.csv'  # Replace with your actual file path
try:
    df = pd.read_csv(file_path, encoding='utf-8')  # Attempt to read as UTF-8
except UnicodeDecodeError:
    df = pd.read_csv(file_path, encoding='ISO-8859-1')  # Fallback to ISO-8859-1 if UTF-8 fails
# # Convert ID from its current format to hexadecimal
# df['_id.$oid'] = df['_id.$oid'].apply(lambda x: format(int(x, 32), 'x'))
df = df.drop_duplicates()
df['barcode'] = df['barcode'].apply(lambda x: float(x))
# Convert active column values to 1 or 0
df['topBrand'] = df['topBrand'].replace({'TRUE': 1, 'FALSE': 0}).fillna(0).astype(int)

# Ensure that all values are either 0 or 1
df['topBrand'] = df['topBrand'].astype(int)

# # Convert scientific notation dates to datetime
# df['createdDate.$date'] = pd.to_datetime(df['createdDate.$date'], unit='ms')
# df['lastLogin.$date'] = pd.to_datetime(df['lastLogin.$date'], unit='ms')

# Rename columns to match MySQL table
df.rename(columns={
    'cpg.$id.$oid': 'cpg_id',
    'cpg.$ref': 'cpg_ref'
}, inplace=True)

df.to_csv("brand_formatted_numeric.csv")

In [None]:
import pandas as pd
from sqlalchemy import create_engine

# Load the CSV file
df = pd.read_csv('old_receipts.csv')

# Drop duplicates based on all columns
df = df.drop_duplicates()

# Convert relevant columns to appropriate data types
# Example: Convert bonusPointsEarned to numeric and fill with None if conversion fails
df['bonusPointsEarned'] = pd.to_numeric(df['bonusPointsEarned'], errors='coerce')  # Convert to numeric
# Convert date columns from scientific notation to datetime
df['createDate.$date'] = pd.to_datetime(df['createDate.$date'], unit='ms')
# df['lastLogin.$date'] = pd.to_datetime(df['lastLogin.$date'], unit='ms')


df['dateScanned.$date'] = pd.to_datetime(df['dateScanned.$date'], unit='ms', errors='coerce')
df['finishedDate.$date'] = pd.to_datetime(df['finishedDate.$date'], unit='ms', errors='coerce')
df['modifyDate.$date'] = pd.to_datetime(df['modifyDate.$date'], unit='ms', errors='coerce')
df['pointsAwardedDate.$date'] = pd.to_datetime(df['pointsAwardedDate.$date'], unit='ms', errors='coerce')
df['purchaseDate.$date'] = pd.to_datetime(df['purchaseDate.$date'], unit='ms', errors='coerce')


# Save the cleaned DataFrame to a new CSV file
df.to_csv("old_receipts_formatted.csv", index=False)

print("Cleaned data saved to old_receipts_formatted.csv")
