In [None]:
import pandas as pd

# Define the columns you want to select
columns_to_select = [
    'dateTime', 
    'authorId', 
    'videoId',  
    'desc', 
    'hashtagNames', 
    'stickersText'
]

try:
    # Load the dataset from the specified path
    dataset = pd.read_csv('../your_dataset.csv')

    # Select the specified columns from the dataset
    # .copy() is used to prevent a SettingWithCopyWarning in pandas
    selected_data = dataset[columns_to_select].copy()

    # Display the first few rows of the new DataFrame
    print("Successfully selected the following columns:")
    print(selected_data.head())

except FileNotFoundError:
    print("Error: The file '../your_dataset.csv' was not found.")
except KeyError as e:
    print(f"Error: A specified column was not found in the CSV file: {e}")



In [None]:
import os
from dotenv import load_dotenv

load_dotenv(dotenv_path='/your_path/.env')
print(os.getenv('SALT_KEY'))



In [None]:
import hashlib
import os
from dotenv import load_dotenv

# Load environment variables from .env file (defaults to current directory)
load_dotenv()  # Optionally add: dotenv_path='/absolute/path/to/.env' if not in script folder

SALT = os.getenv('SALT_KEY')
if not SALT:
    raise ValueError("Error: SALT_KEY not found in .env file.")

def hash_with_salt(value_to_hash, salt):
    """
    Hashes a given value with a salt using the SHA-256 algorithm.
    """
    # Combine the value and the salt, then encode to bytes
    salted_value = str(value_to_hash) + salt
    hasher = hashlib.sha256()
    hasher.update(salted_value.encode('utf-8'))
    return hasher.hexdigest()

# Assuming 'selected_data' DataFrame exists from the previous step
if 'selected_data' in locals():
    selected_data['hashed_authorId'] = selected_data['authorId'].apply(lambda x: hash_with_salt(x, SALT))
    selected_data['hashed_videoId'] = selected_data['videoId'].apply(lambda x: hash_with_salt(x, SALT))
    print("\nDataFrame with Hashed IDs:")
    print(selected_data[['authorId', 'hashed_authorId', 'videoId', 'hashed_videoId']].head())


In [None]:
# Save key pairing to 'DataFrame with Hashed IDs.csv'
selected_data[['authorId', 'hashed_authorId', 'videoId', 'hashed_videoId']].to_csv(
    'Hashed_IDs.csv',
    index=False
)


In [None]:
fields_to_export = ['hashed_videoId', 'hashed_authorId', 'dateTime', 'desc', 'hashtagNames', 'stickersText']

# Save dataset to 'DataSet_Hashed.csv'
selected_data[fields_to_export].to_csv(
    'DataSet_Hashed.csv',
    index=False
)
