In [1]:
import psycopg2
import random
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
from faker import Faker

### Set up a connection to the PostgreSQL database

In [2]:
f = open("credentials\credentials_railway_hospitable-direction.txt")
lines=f.readlines()
host=lines[0][7:-1].strip()
database=lines[1][11:].strip()
user=lines[2][7:].strip()
password=lines[3][11:].strip()
port=lines[4][7:].strip()
url=lines[5][6:].strip()
f.close()

In [3]:
print("host:", host)
print("database:", database)
print("user:", user)
print("password:", password)
print("port:", port)
print("url:", url)

host: containers-us-west-163.railway.app
database: railway
user: postgres
password: fG3tEK5mqcxtUnHhUoc6
port: 6455
url: postgresql://postgres:fG3tEK5mqcxtUnHhUoc6@containers-us-west-163.railway.app:6455/railway


In [4]:
def update_db(query):
    conn = psycopg2.connect(
    host=host,
    database=database,
    user=user,
    password=password,
    port=port
    )

    cur = conn.cursor()

    cur.execute(query)

    conn.commit()
    cur.close()
    conn.close()

    return "Database has been updated"

In [5]:
def check_db(query):
    engine = create_engine(url)
    
    return pd.read_sql_query(text(query), con=engine.connect())

### Create "categories" table

In [8]:
# Create table
query = """
    CREATE TABLE categories(
        category_id SERIAL PRIMARY KEY,
        name VARCHAR(50) NOT NULL
    )
"""
update_db(query)

# Insert data
query = """
    INSERT INTO categories (name)
    VALUES
        ('Finance and Investment'),
        ('Management Skills'),
        ('Technology'),
        ('Business Management'),
        ('Marketing')
"""
update_db(query)

'Database has been updated'

### Check "categories" table

In [10]:
query = "SELECT * FROM categories"
check_db(query)

Unnamed: 0,category_id,name
0,1,Finance and Investment
1,2,Management Skills
2,3,Technology
3,4,Business Management
4,5,Marketing


### Create "events" table

In [13]:
# Create table
query = """
    CREATE TABLE events(
        event_id SERIAL PRIMARY KEY,
        title VARCHAR(100) NOT NULL,
        description TEXT NOT NULL,
        date TIMESTAMP NOT NULL,
        category_id INTEGER NOT NULL,
        CONSTRAINT fk_categories FOREIGN KEY (category_id) REFERENCES categories(category_id)
    )
"""
update_db(query)

# Insert data
query = """
    INSERT INTO events (title, description, date, category_id)
    VALUES
        ('HR INTERNATIONAL SUMMER SCHOOL', 'empty', '2023-06-22 00:00:00', '2'),
        ('OPEN DAY BOOTCAMPS EDEM POWERED BY THE BRIDGE', 'empty', '2023-07-04 19:00:00', '3'),
        ('ENCUENTRO CON ERIC MASKIN, PREMIO NOBEL DE ECONOMÍA 2007', 'empty', '2023-06-05 11:30:00', '1')
"""
update_db(query)

'Database has been updated'

### Check "events" table

In [14]:
query = "SELECT * FROM events"
check_db(query)

Unnamed: 0,event_id,title,description,date,category_id
0,1,HR INTERNATIONAL SUMMER SCHOOL,empty,2023-06-22 00:00:00,2
1,2,OPEN DAY BOOTCAMPS EDEM POWERED BY THE BRIDGE,empty,2023-07-04 19:00:00,3
2,3,"ENCUENTRO CON ERIC MASKIN, PREMIO NOBEL DE ECO...",empty,2023-06-05 11:30:00,1


### Create "users" table

In [None]:
# # Create table
# query = """
#     CREATE TABLE users (
#         user_id SERIAL PRIMARY KEY,
#         name VARCHAR(50) NOT NULL,
#         surname VARCHAR(50) NOT NULL,
#         role_mde name VARCHAR(50) NOT NULL,
#         programme name VARCHAR(50) NOT NULL,
#         year INTEGER NOT NULL,
#         connections TEXT NOT NULL,
#         category_id INTEGER NOT NULL,
#         CONSTRAINT fk_categories FOREIGN KEY (category_id) REFERENCES categories(category_id)
#     )
# """
# update_db(query)

### Create "users_df" dataframe

Column "gender"

In [15]:
# Set the seed value
seed_value = 42

# Set the seed for random module
random.seed(seed_value)

male_count = 481
female_count = 253

genders = []
for _ in range(male_count):
    genders.append("male")

for _ in range(female_count):
    genders.append("female")

random.shuffle(genders)

users_df = pd.DataFrame(genders, columns=["gender"])
users_df

Unnamed: 0,gender
0,female
1,female
2,female
3,male
4,male
...,...
729,male
730,male
731,male
732,male


Column "name"

In [16]:
fake = Faker('es_ES')

male_names = []
female_names = []

for gen in users_df["gender"]:
    if gen == "male":
        male_names.append(fake.name_male())
    elif gen == "female":
        female_names.append(fake.name_female())

users_df.loc[users_df["gender"] == "male", "name"] = male_names
users_df.loc[users_df["gender"] == "female", "name"] = female_names

users_df

Unnamed: 0,gender,name
0,female,Maricruz Montserrat Cantón
1,female,Esther Gisbert Saldaña
2,female,Soraya Castañeda Barriga
3,male,Juan Bautista Blazquez Valderrama
4,male,Adalberto Coloma-Nebot
...,...,...
729,male,Juan Antonio Hoz Pereira
730,male,Jordi Vicente
731,male,Augusto Martin-Cabrero
732,male,Isaac Aramburu Conesa


Columns "programme" and "year"

In [17]:
prog_year = {
    ('BSc in Engineering and Management', 1): 65, ('BSc in Engineering and Management', 2): 42, ('BSc in Engineering and Management', 3): 28, ('BSc in Engineering and Management', 4): 29,
    ('BBA in Business Administration', 1): 128, ('BBA in Business Administration', 2): 92, ('BBA in Business Administration', 3): 83, ('BBA in Business Administration', 4): 72,
    ('Master Marketing and Digital Sales', 1): 61, ('Master Marketing and Digital Sales', 2): 0, ('Master Marketing and Digital Sales', 3): 0, ('Master Marketing and Digital Sales', 4): 0,
    ('Master Data Analytics', 1): 30, ('Master Data Analytics', 2): 0, ('Master Data Analytics', 3): 0, ('Master Data Analytics', 4): 0,
    ('Master Finance', 1): 19, ('Master Finance', 2): 0, ('Master Finance', 3): 0, ('Master Finance', 4): 0,
    ('MBA Junior', 1): 41, ('MBA Junior', 2): 0, ('MBA Junior', 3): 0, ('MBA Junior', 4): 0,
    ('Bootcamp Data Science', 1): 17, ('Bootcamp Data Science', 2): 0, ('Bootcamp Data Science', 3): 0, ('Bootcamp Data Science', 4): 0,
    ('Bootcamp Full Stack', 1): 14, ('Bootcamp Full Stack', 2): 0, ('Bootcamp Full Stack', 3): 0, ('Bootcamp Full Stack', 4): 0,
    ('Bootcamp UX/ UI', 1): 7, ('Bootcamp UX/ UI', 2): 0, ('Bootcamp UX/ UI', 3): 0, ('Bootcamp UX/ UI', 4): 0,
    ('Bootcamp Cibersecurity', 1): 6, ('Bootcamp Cibersecurity', 2): 0, ('Bootcamp Cibersecurity', 3): 0, ('Bootcamp Cibersecurity', 4): 0,
}

# Set the seed for random module
random.seed(seed_value)

users_df["prog_year"] = users_df.apply(
    lambda row: random.choices(list(prog_year.keys()), weights=list(prog_year.values()))[0],
    axis=1
)

# Create the "programme" column
users_df["programme"] = users_df["prog_year"].apply(lambda row: row[0])

# Create the "year_of_study" column
users_df["year"] = users_df["prog_year"].apply(lambda row: row[1])

users_df

Unnamed: 0,gender,name,prog_year,programme,year
0,female,Maricruz Montserrat Cantón,"(BBA in Business Administration, 4)",BBA in Business Administration,4
1,female,Esther Gisbert Saldaña,"(BSc in Engineering and Management, 1)",BSc in Engineering and Management,1
2,female,Soraya Castañeda Barriga,"(BBA in Business Administration, 1)",BBA in Business Administration,1
3,male,Juan Bautista Blazquez Valderrama,"(BSc in Engineering and Management, 4)",BSc in Engineering and Management,4
4,male,Adalberto Coloma-Nebot,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1
...,...,...,...,...,...
729,male,Juan Antonio Hoz Pereira,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1
730,male,Jordi Vicente,"(BBA in Business Administration, 2)",BBA in Business Administration,2
731,male,Augusto Martin-Cabrero,"(BBA in Business Administration, 4)",BBA in Business Administration,4
732,male,Isaac Aramburu Conesa,"(MBA Junior, 1)",MBA Junior,1


In [18]:
users_df.groupby("programme")["name"].count()

programme
BBA in Business Administration        373
BSc in Engineering and Management     167
Bootcamp Cibersecurity                 10
Bootcamp Data Science                  20
Bootcamp Full Stack                    12
Bootcamp UX/ UI                         7
MBA Junior                             31
Master Data Analytics                  29
Master Finance                         27
Master Marketing and Digital Sales     58
Name: name, dtype: int64

Column "email" (school_email)

In [19]:
# Generate school_email based on student_name
users_df["email"] = users_df["name"].apply(lambda name: name.lower().replace(" ", "") + "@edem.es")

users_df

Unnamed: 0,gender,name,prog_year,programme,year,email
0,female,Maricruz Montserrat Cantón,"(BBA in Business Administration, 4)",BBA in Business Administration,4,maricruzmontserratcantón@edem.es
1,female,Esther Gisbert Saldaña,"(BSc in Engineering and Management, 1)",BSc in Engineering and Management,1,esthergisbertsaldaña@edem.es
2,female,Soraya Castañeda Barriga,"(BBA in Business Administration, 1)",BBA in Business Administration,1,sorayacastañedabarriga@edem.es
3,male,Juan Bautista Blazquez Valderrama,"(BSc in Engineering and Management, 4)",BSc in Engineering and Management,4,juanbautistablazquezvalderrama@edem.es
4,male,Adalberto Coloma-Nebot,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1,adalbertocoloma-nebot@edem.es
...,...,...,...,...,...,...
729,male,Juan Antonio Hoz Pereira,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1,juanantoniohozpereira@edem.es
730,male,Jordi Vicente,"(BBA in Business Administration, 2)",BBA in Business Administration,2,jordivicente@edem.es
731,male,Augusto Martin-Cabrero,"(BBA in Business Administration, 4)",BBA in Business Administration,4,augustomartin-cabrero@edem.es
732,male,Isaac Aramburu Conesa,"(MBA Junior, 1)",MBA Junior,1,isaacaramburuconesa@edem.es


Column "age"

In [21]:
# Set the seed for the random module
random.seed(seed_value)

age_range_1 = [18, 19, 20, 21, 22]
age_range_2 = [23, 24, 25, 26, 27]
age_range_3 = [x for x in range(20, 51)]

mask_1_2 = users_df["programme"].isin(['BSc in Engineering and Management', 'BBA in Business Administration'])
mask_3_to_7 = users_df["programme"].isin(['Master Marketing and Digital Sales', 'Master Data Analytics', 'Master Finance', 'MBA Junior'])
mask_8_to_13 = users_df["programme"].isin(['Bootcamp Data Science', 'Bootcamp Full Stack', 'Bootcamp UX/ UI', 'Bootcamp Cibersecurity'])

users_df.loc[mask_1_2, "age"] = random.choices(age_range_1, weights=[5, 3, 1, 1, 0], k=mask_1_2.sum())
users_df.loc[mask_3_to_7, "age"] = random.choices(age_range_2, weights=[2, 2, 2, 2, 2], k=mask_3_to_7.sum())

weights_3_to_13 = [1 / (x - 19) for x in range(20, 51)]
users_df.loc[mask_8_to_13, "age"] = random.choices(age_range_3, weights=weights_3_to_13, k=mask_8_to_13.sum())


users_df

Unnamed: 0,gender,name,prog_year,programme,year,email,age
0,female,Maricruz Montserrat Cantón,"(BBA in Business Administration, 4)",BBA in Business Administration,4,maricruzmontserratcantón@edem.es,19.0
1,female,Esther Gisbert Saldaña,"(BSc in Engineering and Management, 1)",BSc in Engineering and Management,1,esthergisbertsaldaña@edem.es,18.0
2,female,Soraya Castañeda Barriga,"(BBA in Business Administration, 1)",BBA in Business Administration,1,sorayacastañedabarriga@edem.es,18.0
3,male,Juan Bautista Blazquez Valderrama,"(BSc in Engineering and Management, 4)",BSc in Engineering and Management,4,juanbautistablazquezvalderrama@edem.es,18.0
4,male,Adalberto Coloma-Nebot,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1,adalbertocoloma-nebot@edem.es,27.0
...,...,...,...,...,...,...,...
729,male,Juan Antonio Hoz Pereira,"(Master Marketing and Digital Sales, 1)",Master Marketing and Digital Sales,1,juanantoniohozpereira@edem.es,23.0
730,male,Jordi Vicente,"(BBA in Business Administration, 2)",BBA in Business Administration,2,jordivicente@edem.es,19.0
731,male,Augusto Martin-Cabrero,"(BBA in Business Administration, 4)",BBA in Business Administration,4,augustomartin-cabrero@edem.es,19.0
732,male,Isaac Aramburu Conesa,"(MBA Junior, 1)",MBA Junior,1,isaacaramburuconesa@edem.es,23.0


Arrange the "users_df" columns

In [22]:
users_df.columns

Index(['gender', 'name', 'prog_year', 'programme', 'year', 'email', 'age'], dtype='object')

In [23]:
users_df = users_df[["name", "gender", "age", "year", "email", "programme"]]
users_df

Unnamed: 0,name,gender,age,year,email,programme
0,Maricruz Montserrat Cantón,female,19.0,4,maricruzmontserratcantón@edem.es,BBA in Business Administration
1,Esther Gisbert Saldaña,female,18.0,1,esthergisbertsaldaña@edem.es,BSc in Engineering and Management
2,Soraya Castañeda Barriga,female,18.0,1,sorayacastañedabarriga@edem.es,BBA in Business Administration
3,Juan Bautista Blazquez Valderrama,male,18.0,4,juanbautistablazquezvalderrama@edem.es,BSc in Engineering and Management
4,Adalberto Coloma-Nebot,male,27.0,1,adalbertocoloma-nebot@edem.es,Master Marketing and Digital Sales
...,...,...,...,...,...,...
729,Juan Antonio Hoz Pereira,male,23.0,1,juanantoniohozpereira@edem.es,Master Marketing and Digital Sales
730,Jordi Vicente,male,19.0,2,jordivicente@edem.es,BBA in Business Administration
731,Augusto Martin-Cabrero,male,19.0,4,augustomartin-cabrero@edem.es,BBA in Business Administration
732,Isaac Aramburu Conesa,male,23.0,1,isaacaramburuconesa@edem.es,MBA Junior


Double check whether the "users_df" data is aligned with the data provided by EDEM

In [24]:
users_df.groupby("programme")["name"].count()

programme
BBA in Business Administration        373
BSc in Engineering and Management     167
Bootcamp Cibersecurity                 10
Bootcamp Data Science                  20
Bootcamp Full Stack                    12
Bootcamp UX/ UI                         7
MBA Junior                             31
Master Data Analytics                  29
Master Finance                         27
Master Marketing and Digital Sales     58
Name: name, dtype: int64

In [25]:
users_df.groupby(["programme", "year"])["name"].count()

programme                           year
BBA in Business Administration      1       122
                                    2        89
                                    3        85
                                    4        77
BSc in Engineering and Management   1        64
                                    2        40
                                    3        24
                                    4        39
Bootcamp Cibersecurity              1        10
Bootcamp Data Science               1        20
Bootcamp Full Stack                 1        12
Bootcamp UX/ UI                     1         7
MBA Junior                          1        31
Master Data Analytics               1        29
Master Finance                      1        27
Master Marketing and Digital Sales  1        58
Name: name, dtype: int64

In [26]:
users_df.groupby("gender")["name"].count()

gender
female    253
male      481
Name: name, dtype: int64

In [27]:
users_df.groupby(["programme", "year", "gender"])["name"].count()

programme                           year  gender
BBA in Business Administration      1     female    32
                                          male      90
                                    2     female    30
                                          male      59
                                    3     female    29
                                          male      56
                                    4     female    30
                                          male      47
BSc in Engineering and Management   1     female    24
                                          male      40
                                    2     female    14
                                          male      26
                                    3     female     9
                                          male      15
                                    4     female    13
                                          male      26
Bootcamp Cibersecurity              1     female     2
                

Comment:
- The number of students per gender that was randomly generated, is the same as the one provided by EDEM, but when we tried to break it down based on the programmes/majors and year of study, there is a slight difference in the figures generated randomly (distribution of the data). 
- Overall, the random data is quite aligned with the data provided by EDEM. 

### Create "interests" column

In [30]:
query = "SELECT * FROM categories"
categories_df = check_db(query)
categories_df

Unnamed: 0,category_id,name
0,1,Finance and Investment
1,2,Management Skills
2,3,Technology
3,4,Business Management
4,5,Marketing


In [33]:
categories_df["name"].to_list()

['Finance and Investment',
 'Management Skills',
 'Technology',
 'Business Management',
 'Marketing']

In [36]:
interests = categories_df["name"].to_list()

# Set the seed for the random module
random.seed(seed_value)

users_df["categories"] = users_df.apply(lambda row: random.choices(interests, k=random.randint(1, len(interests))), axis=1)

users_df

Unnamed: 0,name,gender,age,year,email,programme,catgories,categories
0,Maricruz Montserrat Cantón,female,19.0,4,maricruzmontserratcantón@edem.es,BBA in Business Administration,[Finance and Investment],[Finance and Investment]
1,Esther Gisbert Saldaña,female,18.0,1,esthergisbertsaldaña@edem.es,BSc in Engineering and Management,"[Management Skills, Finance and Investment, Fi...","[Management Skills, Finance and Investment, Fi..."
2,Soraya Castañeda Barriga,female,18.0,1,sorayacastañedabarriga@edem.es,BBA in Business Administration,"[Finance and Investment, Technology, Finance a...","[Finance and Investment, Technology, Finance a..."
3,Juan Bautista Blazquez Valderrama,male,18.0,4,juanbautistablazquezvalderrama@edem.es,BSc in Engineering and Management,[Technology],[Technology]
4,Adalberto Coloma-Nebot,male,27.0,1,adalbertocoloma-nebot@edem.es,Master Marketing and Digital Sales,"[Technology, Technology, Management Skills, Ma...","[Technology, Technology, Management Skills, Ma..."
...,...,...,...,...,...,...,...,...
729,Juan Antonio Hoz Pereira,male,23.0,1,juanantoniohozpereira@edem.es,Master Marketing and Digital Sales,"[Management Skills, Technology, Finance and In...","[Management Skills, Technology, Finance and In..."
730,Jordi Vicente,male,19.0,2,jordivicente@edem.es,BBA in Business Administration,"[Management Skills, Business Management]","[Management Skills, Business Management]"
731,Augusto Martin-Cabrero,male,19.0,4,augustomartin-cabrero@edem.es,BBA in Business Administration,"[Management Skills, Management Skills, Marketi...","[Management Skills, Management Skills, Marketi..."
732,Isaac Aramburu Conesa,male,23.0,1,isaacaramburuconesa@edem.es,MBA Junior,"[Business Management, Management Skills, Finan...","[Business Management, Management Skills, Finan..."


### Create "user_id" column

In [38]:
users_df.columns

Index(['name', 'gender', 'age', 'year', 'email', 'programme', 'categories'], dtype='object')

In [39]:
# Add "student_id" column into "students_df" dataframe. This column will be a primary key in SQL and will be filled in automatically in SQL
users_df["user_id"] = [x for x in range (1, 735)]

# Put the "student_id" in the first column
users_df = users_df[['user_id', 'name', 'gender', 'age', 'year', 'email', 'programme', 'categories']]

users_df.head()

Unnamed: 0,user_id,name,gender,age,year,email,programme,categories
0,1,Maricruz Montserrat Cantón,female,19.0,4,maricruzmontserratcantón@edem.es,BBA in Business Administration,[Finance and Investment]
1,2,Esther Gisbert Saldaña,female,18.0,1,esthergisbertsaldaña@edem.es,BSc in Engineering and Management,"[Management Skills, Finance and Investment, Fi..."
2,3,Soraya Castañeda Barriga,female,18.0,1,sorayacastañedabarriga@edem.es,BBA in Business Administration,"[Finance and Investment, Technology, Finance a..."
3,4,Juan Bautista Blazquez Valderrama,male,18.0,4,juanbautistablazquezvalderrama@edem.es,BSc in Engineering and Management,[Technology]
4,5,Adalberto Coloma-Nebot,male,27.0,1,adalbertocoloma-nebot@edem.es,Master Marketing and Digital Sales,"[Technology, Technology, Management Skills, Ma..."


In [40]:
users_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 734 entries, 0 to 733
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   user_id     734 non-null    int64  
 1   name        734 non-null    object 
 2   gender      734 non-null    object 
 3   age         734 non-null    float64
 4   year        734 non-null    int64  
 5   email       734 non-null    object 
 6   programme   734 non-null    object 
 7   categories  734 non-null    object 
dtypes: float64(1), int64(2), object(5)
memory usage: 46.0+ KB


In [41]:
# Change "age" column dtype
users_df = users_df.astype({"age": "int64"})

users_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 734 entries, 0 to 733
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   user_id     734 non-null    int64 
 1   name        734 non-null    object
 2   gender      734 non-null    object
 3   age         734 non-null    int64 
 4   year        734 non-null    int64 
 5   email       734 non-null    object
 6   programme   734 non-null    object
 7   categories  734 non-null    object
dtypes: int64(3), object(5)
memory usage: 46.0+ KB


In [42]:
# Save the dataframe in CSV for ML training purpose
users_df.to_csv("data_ds/users_df.csv")

### Convert the "users_df" dataframe into "users" table in PostgreSQL

In [43]:
# Convert the dataframe to SQL table
conn = create_engine(url).connect()
users_df.to_sql(name="users", con=conn, index=False)

734

In [44]:
check_db("SELECT * FROM users")

Unnamed: 0,user_id,name,gender,age,year,email,programme,categories
0,1,Maricruz Montserrat Cantón,female,19,4,maricruzmontserratcantón@edem.es,BBA in Business Administration,"{""Finance and Investment""}"
1,2,Esther Gisbert Saldaña,female,18,1,esthergisbertsaldaña@edem.es,BSc in Engineering and Management,"{""Management Skills"",""Finance and Investment"",..."
2,3,Soraya Castañeda Barriga,female,18,1,sorayacastañedabarriga@edem.es,BBA in Business Administration,"{""Finance and Investment"",Technology,""Finance ..."
3,4,Juan Bautista Blazquez Valderrama,male,18,4,juanbautistablazquezvalderrama@edem.es,BSc in Engineering and Management,{Technology}
4,5,Adalberto Coloma-Nebot,male,27,1,adalbertocoloma-nebot@edem.es,Master Marketing and Digital Sales,"{Technology,Technology,""Management Skills"",Mar..."
...,...,...,...,...,...,...,...,...
729,730,Juan Antonio Hoz Pereira,male,23,1,juanantoniohozpereira@edem.es,Master Marketing and Digital Sales,"{""Management Skills"",Technology,""Finance and I..."
730,731,Jordi Vicente,male,19,2,jordivicente@edem.es,BBA in Business Administration,"{""Management Skills"",""Business Management""}"
731,732,Augusto Martin-Cabrero,male,19,4,augustomartin-cabrero@edem.es,BBA in Business Administration,"{""Management Skills"",""Management Skills"",Marke..."
732,733,Isaac Aramburu Conesa,male,23,1,isaacaramburuconesa@edem.es,MBA Junior,"{""Business Management"",""Management Skills"",""Fi..."


The following line codes have been executed in PostgreSQL editor via DBeaver sofware. No need to execute here in Python

In [36]:
# # Change the "user_id" type as serial for autoincrement ID
# query = """
#     CREATE SEQUENCE my_serial AS integer START 735 OWNED BY users.user_id;

#     ALTER TABLE users ALTER COLUMN user_id SET DEFAULT nextval('my_serial');
# """
# update_db(query)

In [None]:
# Set "user_id" as primary key
# query = """
#     ALTER TABLE users ADD PRIMARY KEY (user_id)
# """
# update_db(query)

In [36]:
# # Set "NOT NULL" for each columns
# query = """
#     ALTER TABLE public.users ALTER COLUMN name SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN gender SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN age SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN "year" SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN email SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN programme SET NOT NULL;
#     ALTER TABLE public.users ALTER COLUMN categories SET NOT NULL;
# """
# update_db(query)

In [None]:
# # Set "category_id" as foreign key
# query = """
#     ALTER TABLE users
#     ADD CONSTRAINT category_id_fk_users_categories FOREIGN KEY (category_id) REFERENCES categories(category_id);
# """
# update_db(query)