In [54]:
# Import necessary libraries
import pandas as pd
import sqlite3
import os

# Directory for the CSV files
directory = '/Users/sarahrwolberg/Desktop/UPENN_Data_Science_Bootcamp/Module_9/sql_challenge/EmployeeSQL'

# Confirm directory existence
if os.path.exists(directory):
    print("Directory exists. Proceeding with file loading.")
else:
    print("Directory does not exist. Please check the path.")



Directory exists. Proceeding with file loading.


In [55]:
# Define file paths
departments_path = os.path.join(directory, 'departments.csv')
dept_manager_path = os.path.join(directory, 'dept_manager.csv')
dept_emp_path=os.path.join(directory, 'dept_emp.csv')
employees_path = os.path.join(directory, 'employees.csv')
salaries_path = os.path.join(directory, 'salaries.csv')
titles_path = os.path.join(directory, 'titles.csv')




In [56]:
pd.read_csv(departments_path)

Unnamed: 0,dept_no,dept_name
0,d001,Marketing
1,d002,Finance
2,d003,Human Resources
3,d004,Production
4,d005,Development
5,d006,Quality Management
6,d007,Sales
7,d008,Research
8,d009,Customer Service


In [57]:
# Load CSV files into DataFrames
departments_df = pd.read_csv(departments_path)
dept_emp_df = pd.read_csv(dept_emp_path)
dept_manager_df = pd.read_csv(dept_manager_path)
employees_df = pd.read_csv(employees_path)
salaries_df = pd.read_csv(salaries_path)
titles_df = pd.read_csv(titles_path)

# Preview one of the DataFrames to ensure proper loading
departments_df.head()

Unnamed: 0,dept_no,dept_name
0,d001,Marketing
1,d002,Finance
2,d003,Human Resources
3,d004,Production
4,d005,Development


In [58]:
#Create a SQL Database
# Create a connection to SQLite
connection = sqlite3.connect("employee_database.db")

# Function to save a DataFrame as a table
def save_to_sqlite(df, table_name, conn):
    df.to_sql(table_name, conn, if_exists='replace', index=False)

# Save DataFrames to SQLite
save_to_sqlite(departments_df, 'departments', connection)
save_to_sqlite(dept_emp_df, 'dept_emp', connection)
save_to_sqlite(dept_manager_df, 'dept_manager', connection)
save_to_sqlite(employees_df, 'employees', connection)
save_to_sqlite(salaries_df, 'salaries', connection)
save_to_sqlite(titles_df, 'titles', connection)

# Verify tables
print("Tables in database:", pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", connection))


Tables in database:            name
0   departments
1      dept_emp
2  dept_manager
3     employees
4      salaries
5        titles


In [59]:
#Employee details and slalaries
query_1 = """
SELECT employees.emp_no, employees.last_name, employees.first_name, employees.sex, salaries.salary
FROM employees
JOIN salaries ON employees.emp_no = salaries.emp_no;
"""
result_1 = pd.read_sql(query_1, connection)
result_1.head()



Unnamed: 0,emp_no,last_name,first_name,sex,salary
0,473302,Zallocco,Hideyuki,M,40000
1,475053,Delgrande,Byong,F,53422
2,57444,Babb,Berry,F,48973
3,421786,Verhoeff,Xiong,M,40000
4,282238,Baumann,Abdelkader,F,40000


In [60]:
#Query 2: Employees hired in 1986
query_2 = """
SELECT first_name, last_name, hire_date
FROM employees
WHERE hire_date LIKE '1986%';
"""
result_2 = pd.read_sql(query_2, connection)
result_2.head()


Unnamed: 0,first_name,last_name,hire_date


In [61]:
#Query 3: Managers and their departments
query_3 = """
SELECT dept_manager.dept_no, departments.dept_name, employees.emp_no, employees.last_name, employees.first_name
FROM dept_manager
JOIN departments ON dept_manager.dept_no = departments.dept_no
JOIN employees ON dept_manager.emp_no = employees.emp_no;
"""
result_3 = pd.read_sql(query_3, connection)
result_3.head()



Unnamed: 0,dept_no,dept_name,emp_no,last_name,first_name
0,d001,Marketing,110022,Markovitch,Margareta
1,d001,Marketing,110039,Minakawa,Vishwani
2,d002,Finance,110085,Alpin,Ebru
3,d002,Finance,110114,Legleitner,Isamu
4,d003,Human Resources,110183,Ossenbruggen,Shirish


In [62]:
#Query 4: Employees and departments
query_4 = """
SELECT dept_emp.dept_no, employees.emp_no, employees.last_name, employees.first_name, departments.dept_name
FROM dept_emp
JOIN employees ON dept_emp.emp_no = employees.emp_no
JOIN departments ON dept_emp.dept_no = departments.dept_no;
"""
result_4 = pd.read_sql(query_4, connection)
result_4.head()


Unnamed: 0,dept_no,emp_no,last_name,first_name,dept_name
0,d005,10001,Facello,Georgi,Development
1,d007,10002,Simmel,Bezalel,Sales
2,d004,10003,Bamford,Parto,Production
3,d004,10004,Koblick,Chirstian,Production
4,d003,10005,Maliniak,Kyoichi,Human Resources


In [63]:
#Data Analysis Section

In [64]:

#1. List the employee number, last name, first name, sex, and salary of each employee
# Query 1: Employee details and salaries
query_1 = """
SELECT employees.emp_no, employees.last_name, employees.first_name, employees.sex, salaries.salary
FROM employees
JOIN salaries ON employees.emp_no = salaries.emp_no;
"""

# Execute the query and display the results
result_1 = pd.read_sql(query_1, connection)
display(result_1)




Unnamed: 0,emp_no,last_name,first_name,sex,salary
0,473302,Zallocco,Hideyuki,M,40000
1,475053,Delgrande,Byong,F,53422
2,57444,Babb,Berry,F,48973
3,421786,Verhoeff,Xiong,M,40000
4,282238,Baumann,Abdelkader,F,40000
...,...,...,...,...,...
300019,464231,Eastman,Constantino,M,69696
300020,255832,Dayang,Yuping,F,75355
300021,76671,Plessier,Ortrud,M,61886
300022,264920,Samarati,Percy,F,62772


In [65]:
#2. List the first name, last name, and hire date for the employees who were hired in 1986
# Query 2: Employees hired in 1986
query_2 = """
SELECT first_name, last_name, hire_date
FROM employees
WHERE hire_date LIKE '1986%';
"""

# Execute the query and display the results
result_2 = pd.read_sql(query_2, connection)
display(result_2)



Unnamed: 0,first_name,last_name,hire_date


In [66]:

#3. List the manager of each department along with their department number, department name, employee number, last name, and first name
# Query 3: Managers and their departments
query_3 = """
SELECT dept_manager.dept_no, departments.dept_name, employees.emp_no, employees.last_name, employees.first_name
FROM dept_manager
JOIN departments ON dept_manager.dept_no = departments.dept_no
JOIN employees ON dept_manager.emp_no = employees.emp_no;
"""

# Execute the query and display the results
result_3 = pd.read_sql(query_3, connection)
display(result_3)



Unnamed: 0,dept_no,dept_name,emp_no,last_name,first_name
0,d001,Marketing,110022,Markovitch,Margareta
1,d001,Marketing,110039,Minakawa,Vishwani
2,d002,Finance,110085,Alpin,Ebru
3,d002,Finance,110114,Legleitner,Isamu
4,d003,Human Resources,110183,Ossenbruggen,Shirish
5,d003,Human Resources,110228,Sigstam,Karsten
6,d004,Production,110303,Wegerle,Krassimir
7,d004,Production,110344,Cools,Rosine
8,d004,Production,110386,Kieras,Shem
9,d004,Production,110420,Ghazalie,Oscar


In [67]:
#4. List the department number for each employee along with their employee number, last name, first name, and department name
# Query 4: Employees and their departments
query_4 = """
SELECT dept_emp.dept_no, employees.emp_no, employees.last_name, employees.first_name, departments.dept_name
FROM dept_emp
JOIN employees ON dept_emp.emp_no = employees.emp_no
JOIN departments ON dept_emp.dept_no = departments.dept_no;
"""

# Execute the query and display the results
result_4 = pd.read_sql(query_4, connection)
display(result_4)



Unnamed: 0,dept_no,emp_no,last_name,first_name,dept_name
0,d005,10001,Facello,Georgi,Development
1,d007,10002,Simmel,Bezalel,Sales
2,d004,10003,Bamford,Parto,Production
3,d004,10004,Koblick,Chirstian,Production
4,d003,10005,Maliniak,Kyoichi,Human Resources
...,...,...,...,...,...
331598,d004,499995,Lichtner,Dekang,Production
331599,d004,499996,Baaz,Zito,Production
331600,d005,499997,Lenart,Berhard,Development
331601,d002,499998,Breugel,Patricia,Finance


In [68]:
#5. List first name, last name, and sex of each employee whose first name is Hercules and whose last name begins with the letter B
# Query 5: Employees whose first name is Hercules and last name begins with 'B'
query_5 = """
SELECT first_name, last_name, sex
FROM employees
WHERE first_name = 'Hercules' AND last_name LIKE 'B%';
"""

# Execute the query and display the results
result_5 = pd.read_sql(query_5, connection)
display(result_5)



Unnamed: 0,first_name,last_name,sex
0,Hercules,Baer,M
1,Hercules,Biron,F
2,Hercules,Birge,F
3,Hercules,Berstel,F
4,Hercules,Bernatsky,M
5,Hercules,Bail,F
6,Hercules,Bodoff,M
7,Hercules,Benantar,F
8,Hercules,Basagni,M
9,Hercules,Bernardinello,F


In [69]:
#6. List each employee in the Sales department, including their employee number, last name, and first name
# Query 6: Employees in the Sales department
query_6 = """
SELECT employees.emp_no, employees.last_name, employees.first_name
FROM employees
JOIN dept_emp ON employees.emp_no = dept_emp.emp_no
JOIN departments ON dept_emp.dept_no = departments.dept_no
WHERE departments.dept_name = 'Sales';
"""

# Execute the query and display the results
result_6 = pd.read_sql(query_6, connection)
display(result_6)


Unnamed: 0,emp_no,last_name,first_name
0,10002,Simmel,Bezalel
1,10016,Cappelletti,Kazuhito
2,10034,Swan,Bader
3,10041,Lenart,Uri
4,10050,Dredge,Yinghua
...,...,...,...
52240,499976,Felder,Guozhong
52241,499980,Usery,Gino
52242,499986,Ranta,Nathan
52243,499987,Dusink,Rimli


In [70]:
#7. List each employee in the Sales and Development departments, including their employee number, last name, first name, and department name
# Query 7: Employees in Sales and Development departments
query_7 = """
SELECT employees.emp_no, employees.last_name, employees.first_name, departments.dept_name
FROM employees
JOIN dept_emp ON employees.emp_no = dept_emp.emp_no
JOIN departments ON dept_emp.dept_no = departments.dept_no
WHERE departments.dept_name IN ('Sales', 'Development');
"""

# Execute the query and display the results
result_7 = pd.read_sql(query_7, connection)
display(result_7)



Unnamed: 0,emp_no,last_name,first_name,dept_name
0,10001,Facello,Georgi,Development
1,10006,Preusig,Anneke,Development
2,10008,Kalloufi,Saniya,Development
3,10012,Bridgland,Patricio,Development
4,10014,Genin,Berni,Development
...,...,...,...,...
137947,499976,Felder,Guozhong,Sales
137948,499980,Usery,Gino,Sales
137949,499986,Ranta,Nathan,Sales
137950,499987,Dusink,Rimli,Sales


In [71]:
#8. List the frequency counts, in descending order, of all the employee last names
# Query 8: Frequency counts of employee last names in descending order
query_8 = """
SELECT last_name, COUNT(last_name) AS name_count
FROM employees
GROUP BY last_name
ORDER BY name_count DESC;
"""

# Execute the query and display the results
result_8 = pd.read_sql(query_8, connection)
display(result_8)



Unnamed: 0,last_name,name_count
0,Baba,226
1,Gelosh,223
2,Coorg,223
3,Sudbeck,222
4,Farris,222
...,...,...
1633,Guardalben,148
1634,Georgatos,148
1635,Merro,147
1636,Sadowsky,145


In [None]:
#Table Schemas

In [74]:
#Table: departments

# Establish a connection to SQLite database
connection = sqlite3.connect("employee_database.db")
cursor = connection.cursor()

# Create the `departments` table
create_departments_table = """
CREATE TABLE IF NOT EXISTS departments (
    dept_no CHAR(4) PRIMARY KEY,
    dept_name VARCHAR(40) NOT NULL
);
"""

# Execute the SQL command
cursor.execute(create_departments_table)

# Commit the changes and close the cursor
connection.commit()
cursor.close()

# Verify the table creation
print("Table `departments` created successfully.")



Table `departments` created successfully.


In [72]:
#Step 5: Save Queries to .sql File
queries = """
-- Query 1
SELECT employees.emp_no, employees.last_name, employees.first_name, employees.sex, salaries.salary
FROM employees
JOIN salaries ON employees.emp_no = salaries.emp_no;

-- Query 2
SELECT first_name, last_name, hire_date
FROM employees
WHERE hire_date LIKE '1986%';

-- Query 3
SELECT dept_manager.dept_no, departments.dept_name, employees.emp_no, employees.last_name, employees.first_name
FROM dept_manager
JOIN departments ON dept_manager.dept_no = departments.dept_no
JOIN employees ON dept_manager.emp_no = employees.emp_no;

-- Query 4
SELECT dept_emp.dept_no, employees.emp_no, employees.last_name, employees.first_name, departments.dept_name
FROM dept_emp
JOIN employees ON dept_emp.emp_no = employees.emp_no
JOIN departments ON dept_emp.dept_no = departments.dept_no;
"""

with open("challenge_9_queries.sql", "w") as file:
    file.write(queries)
