In [1]:
print("Hello world")

Hello world


In [1]:
import numpy as np
import pandas as pd
from faker import Faker
import random
from sqlalchemy import create_engine

fake = Faker()
Faker.seed(0)
np.random.seed(0)
random.seed(0)

In [2]:
# -----------------------
# 1. AUTH SERVICE (MySQL)
# -----------------------

# Crear usuarios
num_users = 20000
user_ids = np.arange(1, num_users + 1)

users_df = pd.DataFrame({
    'id': user_ids,
    'usuario': [fake.user_name() for _ in range(num_users)],
    'correo': [fake.unique.email() for _ in range(num_users)],
    'contraseña': [fake.password(length=12) for _ in range(num_users)],
})

# Dividir en clientes y artistas
is_artist = np.random.choice([True, False], size=num_users)

clients_df = pd.DataFrame({
    'id': user_ids[~is_artist],
    'nombre': [fake.first_name() for _ in range(np.sum(~is_artist))],
    'apellido': [fake.last_name() for _ in range(np.sum(~is_artist))],
})

artists_df = pd.DataFrame({
    'id': user_ids[is_artist],
    'nombre_artistico': [fake.user_name() + "_art" for _ in range(np.sum(is_artist))],
    'genero_principal': np.random.choice(['Rock', 'Pop', 'Jazz', 'Electronic', 'Hip-Hop'], size=np.sum(is_artist))
})


In [3]:
# 2. BUSINESS LOGIC (PostgreSQL)
# -----------------------------

# Crear canciones para artistas
artist_ids = artists_df['id'].values
num_songs = 20000

songs_df = pd.DataFrame({
    'id': np.arange(1, num_songs + 1),
    'titulo': [fake.sentence(nb_words=3).rstrip('.') for _ in range(num_songs)],
    'genero': np.random.choice(['Rock', 'Pop', 'Jazz', 'Electronic', 'Hip-Hop'], size=num_songs),
    'artista_id': np.random.choice(artist_ids, size=num_songs)
})

# Crear posts por clientes
client_ids = clients_df['id'].values
num_posts = 20000

posts_df = pd.DataFrame({
    'id': np.arange(1, num_posts + 1),
    'text': [fake.paragraph(nb_sentences=3) for _ in range(num_posts)],
    'cliente_id': np.random.choice(client_ids, size=num_posts),
    'likes': np.random.randint(0, 1000, size=num_posts)
})

In [5]:
# ----------------------------
# 3. BUSINESS LOGIC (MongoDB)
# ----------------------------

# Crear bookmarks
num_bookmarks = 20000

bookmarks_df = pd.DataFrame({
    'id': np.arange(1, num_bookmarks + 1),
    'user': np.random.choice(user_ids, size=num_bookmarks),
    'post': np.random.choice(posts_df['id'].values, size=num_bookmarks)
})

# Crear hilos
num_hilos = 20000

hilos_df = pd.DataFrame({
    'id': np.arange(1, num_hilos + 1),
    'texto': [fake.sentence(nb_words=10) for _ in range(num_hilos)],
    'user': np.random.choice(user_ids, size=num_hilos),
    'post': np.random.choice(posts_df['id'].values, size=num_hilos),
    'likes': np.random.randint(0, 500, size=num_hilos)
})


In [6]:
# --------------------------
# Exportar a archivos locales
# --------------------------

users_df.to_csv("auth_users.csv", index=False)
clients_df.to_csv("auth_clients.csv", index=False)
artists_df.to_csv("auth_artists.csv", index=False)

songs_df.to_csv("logic_songs.csv", index=False)
posts_df.to_csv("logic_posts.csv", index=False)

bookmarks_df.to_csv("logic_bookmarks.csv", index=False)
hilos_df.to_csv("logic_hilos.csv", index=False)