In [14]:
import pandas as pd
from faker import Faker
import random
from datetime import datetime, timedelta


In [15]:
# Initialize the Faker instance
fake = Faker()

In [16]:
# Generate fake user data
num_users = 10000
user_data = {
    'UserID': list(range(1, num_users + 1)),
 'Username': [fake.user_name() for _ in range(num_users)],
 'Password': [fake.password() for _ in range(num_users)],
    'Email': [fake.email() for _ in range(num_users)],
  'DateCreated': [fake.date_time_between(start_date='-2y', end_date='now') for _ in range(num_users)]
}

users_df = pd.DataFrame(user_data)

In [17]:
# Generate fake profile data
num_profiles = num_users
profile_data = {
    'ProfileID': list(range(1, num_profiles + 1)),
    'UserID': users_df['UserID'],
  'FirstName': [fake.first_name() for _ in range(num_profiles)],
    'LastName': [fake.last_name() for _ in range(num_profiles)],
    'Age': [random.randint(18, 65) for _ in range(num_profiles)],
  'Gender': [random.choice(['Male', 'Female', 'Other']) for _ in range(num_profiles)],
    'ProfessionalDetails': [random.choice(['Engineer', 'Data Professional', 'Data Analyst', 'Software Developer']) for _ in range(num_profiles)],
    'Bio': [fake.text(max_nb_chars=100) for _ in range(num_profiles)],
 'Barters': [random.randint(0, 100) for _ in range(num_profiles)],
    'LeftSwipes': [random.randint(0, 1000) for _ in range(num_profiles)],
 'RightSwipes': [random.randint(0, 1000) for _ in range(num_profiles)]
}
profiles_df = pd.DataFrame(profile_data)

In [18]:
# Generate fake location data
num_locations = num_users
location_data = {
    'LocationID': list(range(1, num_locations + 1)),
    'UserID': users_df['UserID'],
    'Country': [fake.country() for _ in range(num_locations)],
    'State': [fake.state() for _ in range(num_locations)],
    'City': [fake.city() for _ in range(num_locations)],
    'Area': [fake.street_address() for _ in range(num_locations)]
}
locations_df = pd.DataFrame(location_data)


In [19]:
# Generate fake interest data
num_interests = num_users * 3
interest_data = {
    'InterestID': list(range(1, num_interests + 1)),
    'UserID': [random.choice(users_df['UserID']) for _ in range(num_interests)],
    'Type': [random.choice(['Hobby', 'Like', 'Dislike']) for _ in range(num_interests)],
    'Description': [fake.sentence(nb_words=5) for _ in range(num_interests)]
}
interests_df = pd.DataFrame(interest_data)

In [20]:
# Generate fake connection data
num_connections = num_users * 2
connection_data = {
    'ConnectionID': list(range(1, num_connections + 1)),
    'ProfileID1': [random.choice(profiles_df['ProfileID']) for _ in range(num_connections)],
    'ProfileID2': [random.choice(profiles_df['ProfileID']) for _ in range(num_connections)],
    'DateConnected': [fake.date_time_between(start_date='-1y', end_date='now') for _ in range(num_connections)]
}
connections_df = pd.DataFrame(connection_data)

In [21]:
# Generate fake swipe data
num_swipes = num_users * 50
swipe_data = {
    'SwipeID': list(range(1, num_swipes + 1)),
    'FromProfileID': [random.choice(profiles_df['ProfileID']) for _ in range(num_swipes)],
    'ToProfileID': [random.choice(profiles_df['ProfileID']) for _ in range(num_swipes)],
    'SwipeType': [random.choice(['Left', 'Right']) for _ in range(num_swipes)],
    'SwipeSubtype': [random.choice(['Red', 'Blue', 'Green']) if random.random() < 0.3 else None for _ in range(num_swipes)],
    'DateSwiped': [fake.date_time_between(start_date='-6m', end_date='now') for _ in range(num_swipes)]
}
swipes_df = pd.DataFrame(swipe_data)

In [22]:
# Print the dataframes
print("Users:")
print(users_df.head())
print("\nProfiles:")
print(profiles_df.head())
print("\nLocations:")
print(locations_df.head())
print("\nInterests:")
print(interests_df.head())
print("\nConnections:")
print(connections_df.head())
print("\nSwipes:")
print(swipes_df.head())

Users:
   UserID      Username    Password                         Email  \
0       1       lberger  b&h7XNiOo@            bhenry@example.net   
1       2    ashleyfrey  ^0%pR7a%b1    gutierreztracy@example.com   
2       3    tammybrown  BK^0xL^y3j           roger56@example.com   
3       4   wfitzgerald  StS2t&Qx^8           lfowler@example.net   
4       5  parkerjoseph  jN9fZHsq)w  jacquelinenorris@example.net   

          DateCreated  
0 2022-12-14 16:21:53  
1 2022-09-03 14:02:16  
2 2022-07-09 06:32:27  
3 2023-12-22 11:12:43  
4 2023-04-07 23:14:02  

Profiles:
   ProfileID  UserID FirstName LastName  Age  Gender ProfessionalDetails  \
0          1       1    Jeremy    Moore   23   Other            Engineer   
1          2       2     Tonya     Moss   57  Female            Engineer   
2          3       3  Kimberly     Wood   26  Female        Data Analyst   
3          4       4     Kevin    Barry   49   Other  Software Developer   
4          5       5    Joshua    Jones   2

In [23]:
import os



In [24]:


# Create a directory to save the CSV files
output_dir = 'fake_data'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)



In [26]:


# Save the DataFrames as CSV files
users_df.to_csv(os.path.join(output_dir, 'users.csv'), index=False)
profiles_df.to_csv(os.path.join(output_dir, 'profiles.csv'), index=False)
locations_df.to_csv(os.path.join(output_dir, 'locations.csv'), index=False)
interests_df.to_csv(os.path.join(output_dir, 'interests.csv'), index=False)
connections_df.to_csv(os.path.join(output_dir, 'connections.csv'), index=False)
swipes_df.to_csv(os.path.join(output_dir, 'swipes.csv'), index=False)


print("Fake data saved in the 'fake_data' directory.")

Fake data saved in the 'fake_data' directory.
