## Advanced SQL Puzzles with Pandas

Solving and enjoing SQL puzzles wtih Pandas! <br>
As author said:
>Working through these puzzles will give you an understanding of the SQL language and what types of
problems the SQL language best solves.

From 4 notebooks, __Pandas__ in the easiest one. The framework have a lot of tools, methods and good documentation.<br>
Sometimes tasks that should be difficult are solved by 2 functions.

__[More about puzzles and author](https://advancedsqlpuzzles.com/)__

### Setup

In [1]:
import numpy as np
import pandas as pd

In [2]:
!python --version

Python 3.9.7


In [3]:
np.__version__

'1.20.3'

In [4]:
pd.__version__

'1.3.2'

### Puzzle 1

In [7]:
d11 = {'item': ['Sugar', 'Bread', 'Juice', 'Soda', 'Flour']}
df11 = pd.DataFrame(data=d11)

In [8]:
d12 = {'item': ['Sugar', 'Bread', 'Butter', 'Cheese', 'Fruit']}
df12 = pd.DataFrame(data=d12)

In [9]:
df11

Unnamed: 0,item
0,Sugar
1,Bread
2,Juice
3,Soda
4,Flour


In [10]:
df12

Unnamed: 0,item
0,Sugar
1,Bread
2,Butter
3,Cheese
4,Fruit


In [12]:
df11['Item Cart 1'] = df11['item']

In [16]:
df11.set_index('item')

Unnamed: 0_level_0,Item Cart 1
item,Unnamed: 1_level_1
Sugar,Sugar
Bread,Bread
Juice,Juice
Soda,Soda
Flour,Flour


In [19]:
df12['Item Cart 2'] = df12['item']

In [20]:
df12.set_index('item')

Unnamed: 0_level_0,Item Cart 2
item,Unnamed: 1_level_1
Sugar,Sugar
Bread,Bread
Butter,Butter
Cheese,Cheese
Fruit,Fruit


In [22]:
df11.merge(df12, how="outer")[['Item Cart 1', 'Item Cart 2']]

Unnamed: 0,Item Cart 1,Item Cart 2
0,Sugar,Sugar
1,Bread,Bread
2,Juice,
3,Soda,
4,Flour,
5,,Butter
6,,Cheese
7,,Fruit


### Puzzle 2

In [24]:
d2 = {'Employee ID': [1001, 2002, 3003, 4004, 5005, 6006, 7007],
     'Manager ID': [np.nan, 1001, 1001, 2002, 2002, 2002, 5005],
     'Job Title': ['President', 'Director', 'Office Manager', 'Engineer', 'Engineer', 'Engineer', "Intern"]}
df2 = pd.DataFrame(data=d2, dtype=np.int16)

  df2 = pd.DataFrame(data=d2, dtype=np.int16)


In [26]:
df2.head()

Unnamed: 0,Employee ID,Manager ID,Job Title
0,1001,,President
1,2002,1001.0,Director
2,3003,1001.0,Office Manager
3,4004,2002.0,Engineer
4,5005,2002.0,Engineer


In [29]:
df2[df2["Employee ID"] == 1001]["Manager ID"].values.any()

nan

In [30]:
df2[df2["Employee ID"] == 2002]["Manager ID"].values.any()

1001

In [36]:
def checkDeep(EmployeeID, c):
    if df2[df2["Employee ID"] == EmployeeID]["Manager ID"].isnull().values.any():
        return c
    else:
        c = checkDeep(df2[df2["Employee ID"] == EmployeeID]["Manager ID"].values.any(), c+1)
        return c

In [37]:
df2['Depth'] = df2['Employee ID'].apply(lambda x: checkDeep(x, c=0))

In [39]:
df2.head()

Unnamed: 0,Employee ID,Manager ID,Job Title,Depth
0,1001,,President,0
1,2002,1001.0,Director,1
2,3003,1001.0,Office Manager,1
3,4004,2002.0,Engineer,2
4,5005,2002.0,Engineer,2


### Puzzle 3

In [42]:
d3 = {'Customer ID': [1001, 1001, 1001, 1001, 2002, 3003, 3003, 4004],
     'Order ID ': [1, 2, 3, 4, 5, 6, 7, 8],
     'Delivery State': ['CA', 'TX', 'TX', 'TX', 'WA', 'CA', "CA", 'TX'],
     'Amount': ["$340", '$950', '$670', '$860', '$320', '$650', '$830', '$120']}
df3 = pd.DataFrame(data=d3, dtype=np.int16)

  df3 = pd.DataFrame(data=d3, dtype=np.int16)


In [43]:
df3.head()

Unnamed: 0,Customer ID,Order ID,Delivery State,Amount
0,1001,1,CA,$340
1,1001,2,TX,$950
2,1001,3,TX,$670
3,1001,4,TX,$860
4,2002,5,WA,$320


In [44]:
df3[df3['Delivery State'] == 'TX'].head(len(df3[df3['Delivery State'] == 'CA']))

Unnamed: 0,Customer ID,Order ID,Delivery State,Amount
1,1001,2,TX,$950
2,1001,3,TX,$670
3,1001,4,TX,$860


### Puzzle 4

In [45]:
d4 = {'Customer ID': [1001, 1001, 1001, 2002, 2002, 3003],
     'Type': ["Cellular", "Work", "Home", "Cellular", "Work", "Cellular"],
     'Phone Number': ['555-897-5421', '555-897-6542', '555-698-9874', '555-963-6544', '555-812-9856', '555-987-6541']}
df4 = pd.DataFrame(data=d4, dtype=np.int16)

  df4 = pd.DataFrame(data=d4, dtype=np.int16)


In [46]:
df4.head()

Unnamed: 0,Customer ID,Type,Phone Number
0,1001,Cellular,555-897-5421
1,1001,Work,555-897-6542
2,1001,Home,555-698-9874
3,2002,Cellular,555-963-6544
4,2002,Work,555-812-9856


In [47]:
df4.pivot(index='Customer ID', columns='Type', values='Phone Number')

Type,Cellular,Home,Work
Customer ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,555-897-5421,555-698-9874,555-897-6542
2002,555-963-6544,,555-812-9856
3003,555-987-6541,,


### Puzzle 7

In [48]:
d7 = {'Customer ID': [1001, 1001, 1001, 1001, 2002, 2002, 2002, 3003, 3003, 4004],
     'Type': ["Geologist", "Astrogator", "Biochemist", "Technician", "Surgeon", "Machinist", "Geologist", "Geologist", "Astrogator", "Selenologist"]}
df7 = pd.DataFrame(data=d7, dtype=np.int16)

  df7 = pd.DataFrame(data=d7, dtype=np.int16)


In [51]:
df7.head(10)

Unnamed: 0,Customer ID,Type
0,1001,Geologist
1,1001,Astrogator
2,1001,Biochemist
3,1001,Technician
4,2002,Surgeon
5,2002,Machinist
6,2002,Geologist
7,3003,Geologist
8,3003,Astrogator
9,4004,Selenologist


In [49]:
d7 = {'Description': ["Geologist", "Astrogator", "Technician"]}
requrements = pd.DataFrame(data=d7, dtype=np.int16)

  requrements = pd.DataFrame(data=d7, dtype=np.int16)


In [52]:
requrements.head()

Unnamed: 0,Description
0,Geologist
1,Astrogator
2,Technician


In [53]:
requrements['Description'].values

array(['Geologist', 'Astrogator', 'Technician'], dtype=object)

### Puzzle 8

In [55]:
d8 = {'Workflow': ["Alpha", "Bravo", "Charlie", "Delta"],
     'Case 1': [0, 0, 1, 0],
     'Case 2': [0, 1, 0, 0],
     'Case 3': [0, 1, 0, 0]}
df8 = pd.DataFrame(data=d8, dtype=np.int16)

  df8 = pd.DataFrame(data=d8, dtype=np.int16)


In [56]:
df8['Passed'] = df8['Case 1'] +  df8['Case 2'] + df8['Case 3']

In [57]:
dfResult = df8[['Workflow', 'Passed']]

In [58]:
dfResult.head()

Unnamed: 0,Workflow,Passed
0,Alpha,0
1,Bravo,2
2,Charlie,1
3,Delta,0


### Puzzle 10

In [59]:
(5),(6),(10),(10),(13),
(14),(17),(20),(81),(90),(76);

In [60]:
r = [5, 6, 10, 10, 13, 14, 17, 20, 81, 90, 76]
ser = pd.Series(r, copy=False)

In [61]:
ser.mean()

31.09090909090909

In [62]:
ser.median()

14.0

In [63]:
ser.mode()

0    10
dtype: int64

### Puzzle 11

In [65]:
d11 = {'Test Case': ["A", "B", "C"]}
df11 = pd.DataFrame(data=d11)

In [66]:
val = df11['Test Case'].values

In [67]:
from itertools import permutations

perm = permutations(val)
valuePerm = [",".join([y for y in x]) for x in perm]

In [68]:
valuePerm

['A,B,C', 'A,C,B', 'B,A,C', 'B,C,A', 'C,A,B', 'C,B,A']

In [69]:
d = {'Test Case': valuePerm}
df = pd.DataFrame(data=d)

In [70]:
df.head()

Unnamed: 0,Test Case
0,"A,B,C"
1,"A,C,B"
2,"B,A,C"
3,"B,C,A"
4,"C,A,B"


### Puzzle 12

In [71]:
d12 = {'Workflow': ["Alpha", "Alpha", "Alpha", "Bravo", "Bravo", "Bravo", "Charlie", "Charlie", "Charlie"],
     'Execution Date': ["6/1/2018", "6/14/2018", "6/15/2018", "6/1/2018", "6/2/2018", "6/19/2018", "6/1/2018", "6/15/2018", "6/30/2018"]}
df12 = pd.DataFrame(data=d12)
df12['Execution Date'] = pd.to_datetime(df12['Execution Date'])

In [72]:
df12.head(10)

Unnamed: 0,Workflow,Execution Date
0,Alpha,2018-06-01
1,Alpha,2018-06-14
2,Alpha,2018-06-15
3,Bravo,2018-06-01
4,Bravo,2018-06-02
5,Bravo,2018-06-19
6,Charlie,2018-06-01
7,Charlie,2018-06-15
8,Charlie,2018-06-30


In [73]:
def avgTime(x):
    diff = x.diff().dropna().dt.days.mean()
    return int(diff)

In [74]:
dfAgg = df12.groupby('Workflow')[['Execution Date']].agg(avgTime)

In [75]:
dfAgg

Unnamed: 0_level_0,Execution Date
Workflow,Unnamed: 1_level_1
Alpha,7
Bravo,9
Charlie,14


### Puzzle 13

In [76]:
d13 = {'Date': ["7/1/2018", "7/2/2018", "7/3/2018", "7/4/2018", "7/5/2018"],
     'Quantity Adjustment': [100, 75, -150, 50, -100]}
df13 = pd.DataFrame(data=d13)
df13['Date'] = pd.to_datetime(df13['Date'])

In [78]:
df13.head(10)

Unnamed: 0,Date,Quantity Adjustment
0,2018-07-01,100
1,2018-07-02,75
2,2018-07-03,-150
3,2018-07-04,50
4,2018-07-05,-100


In [79]:
df13['Inventory'] = df13['Quantity Adjustment'].cumsum()

In [80]:
df13.head()

Unnamed: 0,Date,Quantity Adjustment,Inventory
0,2018-07-01,100,100
1,2018-07-02,75,175
2,2018-07-03,-150,25
3,2018-07-04,50,75
4,2018-07-05,-100,-25


### Puzzle 14

In [81]:
d14 = {'Workflow': ["Alpha", "Alpha", "Alpha", "Bravo", "Bravo", "Charlie", "Charlie", "Delta", "Delta", "Echo", "Echo"],
     'Step Number': [1, 2, 3, 1, 2, 1, 2, 1, 2, 1, 2],
     'Execution Date': ["Error", "Complete", "Running", "Complete", "Complete", "Running", "Running", "Error", "Error", "Running", "Complete"]}
df14 = pd.DataFrame(data=d14)

In [88]:
df14

Unnamed: 0,Workflow,Step Number,Execution Date
0,Alpha,1,Error
1,Alpha,2,Complete
2,Alpha,3,Running
3,Bravo,1,Complete
4,Bravo,2,Complete
5,Charlie,1,Running
6,Charlie,2,Running
7,Delta,1,Error
8,Delta,2,Error
9,Echo,1,Running


In [84]:
def getStatus(x):
    val = x.values
    if len(set(val)) == 1:
        return val[0]
    elif "Error" in val:
        return "Indeterminate"
    else:
        return "Running"

In [86]:
df14.groupby('Workflow')[["Execution Date"]].agg(getStatus)

Unnamed: 0_level_0,Execution Date
Workflow,Unnamed: 1_level_1
Alpha,Indeterminate
Bravo,Complete
Charlie,Running
Delta,Error
Echo,Running


### Puzzle 15

In [89]:
d15 = {'Workflow': [1, 2, 3, 4, 5, 6, 7, 8, 9],
     'Step Number': ["SELECT", "Product,", "UnitPrice,", "EffectiveDate", "FROM", "Products", "WHERE", "UnitPrice", "> 100"]}
df15 = pd.DataFrame(data=d15)

In [90]:
" ".join(df['Step Number'].values)

KeyError: 'Step Number'

In [91]:
df15 = df15.agg(sum)

In [92]:
df15['Step Number']

'SELECTProduct,UnitPrice,EffectiveDateFROMProductsWHEREUnitPrice> 100'

### Puzzle 17

In [93]:
d17 = {'Product': ["Pencil", "Eraser", "Notebook"],
     'Step Number': [3, 4, 2]}
df17 = pd.DataFrame(data=d17)

In [94]:
df17.head()

Unnamed: 0,Product,Step Number
0,Pencil,3
1,Eraser,4
2,Notebook,2


In [None]:
["1"] * 3

In [96]:
df17['Step Number'] = df17['Step Number'].map(lambda x: ["1"] * x)

In [97]:
df17.explode('Step Number')

Unnamed: 0,Product,Step Number
0,Pencil,1
0,Pencil,1
0,Pencil,1
1,Eraser,1
1,Eraser,1
1,Eraser,1
1,Eraser,1
2,Notebook,1
2,Notebook,1


### Task 18

In [98]:
d18 = {'SeatingChart': [7, 13, 14, 15, 27, 28, 29, 30, 31, 32, 33, 34, 52, 53, 54]}
df18 = pd.DataFrame(data=d18)

In [99]:
df18.head(30)

Unnamed: 0,SeatingChart
0,7
1,13
2,14
3,15
4,27
5,28
6,29
7,30
8,31
9,32


In [101]:
np.arange(1, df18['SeatingChart'].max()+1)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54], dtype=int64)

In [102]:
vals = df18['SeatingChart'].values

In [None]:
dfAnother = dfAlls[~dfAlls.isin(vals)].dropna()

In [None]:
dfGroups = pd.concat([pd.DataFrame({'SeatingChart': [1]}), df], ignore_index=True)

In [None]:
dfAns = dfNew[dfNew.diff(1)['SeatingChart'] > 1]

In [None]:
dfAns

In [None]:
dfAlls = pd.DataFrame(data={'SeatingChart': np.arange(1, df['SeatingChart'].max())})

In [None]:
dfAlls

In [None]:
dfAnother.count()

In [None]:
df.isin(somewhere)

In [None]:
df[df['SeatingChart'] % 2 == 0].count()

In [None]:
df[df['SeatingChart'] % 2 == 1].count()

### Puzzle 19

### Puzzle 20

In [None]:
d = {'Product ID': [1001, 1001, 1001, 2002, 2002],
     'Effective Date': ["1/1/2018", "4/15/2018", "6/8/2018", "4/17/2018", "5/19/2018 "],
     'Unit Price': ["$1.99", "$2.99", "$3.99", "$2.99", "$1.99"]}
df = pd.DataFrame(data=d)
df['Effective Date'] = pd.to_datetime(df['Effective Date'])

In [None]:
df.loc[df.groupby('Product ID')['Effective Date'].idxmax()]

In [None]:
df.agg(selectMax, axis = 1)

In [None]:
len(["TX", "TX", "TX", "TX", "TX", "TX", "TX", "IA", "IA", "IA", "IA", "IA", "IA"])

### Puzzle 21

In [None]:
d = {'Order ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
     'Customer ID': [1001, 1001, 1001, 1001, 1001, 2002, 2002, 3003, 3003, 3003, 4004, 4004, 4004],
     'Order Date': ['1/1/2018', "1/1/2018", '1/1/2018', "2/1/2018", "3/1/2018", "2/1/2018", "2/1/2018", "1/1/2018", "2/1/2018", "3/1/2018", "4/1/2018", "5/1/2018", "5/1/2018"],
     'Amount': ["$100", "$150", "$75", "$100", "$100", "$75", "$150", "$100", "$100", "$100", "$100", "$50", "$100"],
     'State': ["TX", "TX", "TX", "TX", "TX", "TX", "TX", "IA", "IA", "IA", "IA", "IA", "IA"]}
df = pd.DataFrame(data=d)

In [None]:
df

In [None]:
"$100"[1:]

In [None]:
def moneyMean(value):
    return int(value[1:])

In [None]:
df["Amount"] = df.apply(lambda x: moneyMean(x["Amount"]), axis=1)

In [None]:
meanDF = df.groupby("Customer ID")['Amount'].mean()

In [None]:
indexVals = meanDF[meanDF > 100].index

In [None]:
df[df['Customer ID'].isin(indexVals)]

### Puzzle 22

In [None]:
d = {'Workflow': ["Bravo", "Alpha", "Charlie", "Alpha", "Bravo", "Charlie", "Alpha", "Charlie"],
     'Message': ["Error: Cannot Divide by 0", "Error: Conversion Failed", "Error: Conversion Failed", "Error: Unidentified error occurred", "Error: Unidentified error occurred", "Error: Unidentified error occurred", "Status Complete", "Status Complete"],
     'Occurrences': [3, 5, 7, 9, 1, 10, 8, 6]}
df = pd.DataFrame(data=d)

In [None]:
df

In [None]:
df.loc[df.groupby('Message')['Occurrences'].idxmax()]

### Puzzle 23

In [None]:
d = {'Player ID': [1001, 2002, 3003, 4004, 5005],
     'Score': [2343, 9432, 6548, 1054, 6832]}
df = pd.DataFrame(data=d)

In [None]:
df

In [None]:
df.sort_values(by=['Score'], ascending=False, ignore_index=True)

### Puzzle 24

In [None]:
d = {'Order ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
     'Customer ID': [1001, 1001, 1001, 1001, 1001, 2002, 2002, 3003, 3003, 3003, 4004, 4004, 4004],
     'Order Date': ['1/1/2018', "1/1/2018", '1/1/2018', "2/1/2018", "3/1/2018", "2/1/2018", "2/1/2018", "1/1/2018", "2/1/2018", "3/1/2018", "4/1/2018", "5/1/2018", "5/1/2018"],
     'Amount': ["$100", "$150", "$75", "$100", "$100", "$75", "$150", "$100", "$100", "$100", "$100", "$50", "$100"],
     'State': ["TX", "TX", "TX", "TX", "TX", "TX", "TX", "IA", "IA", "IA", "IA", "IA", "IA"]}
df = pd.DataFrame(data=d)

In [None]:
df

In [None]:
df[df['Order ID'].between(5, 10)]

### Puzzle 25

In [None]:
d = {'Order ID': [1, 2, 3, 4, 5, 6],
     'Customer ID': [1001, 1001, 1001, 2002, 2002, 2002],
     'Count': [12, 54, 32, 7, 16, 5],
     'Vendor': ['Direct Parts', 'Direct Parts', "ACME", "ACME", "ACME", "Direct Parts"]}
df = pd.DataFrame(data=d)

In [None]:
df.loc[df.groupby("Customer ID")['Count'].idxmax()][['Customer ID', 'Vendor']]

### Puzzle 26

In [None]:
d = {'Year': [2018, 2017, 2017, 2016, 2016, 2016],
     'Amount': ["$352,645", "$165,565", "$254,654", "$159,521", "$251,696", "$111,894"]}
df = pd.DataFrame(data=d)

In [None]:
def moneyMean(value):
    value = value.replace(",", "")
    return int(value[1:])

In [None]:
df['Amount'] = df['Amount'].map(lambda x: moneyMean(x))

In [None]:
df.groupby("Year").sum().transpose()

### Puzzle 27

### Puzzle 28

In [None]:
d = {'Year': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
     'Amount': ["Alpha", None, None, None, "Bravo", None, None, None, None, "Charlie", None, None],
     'Status': ['Pass', "Fail", "Fail", 'Fail', "Pass", 'Fail', "Fail", "Pass", "Pass", "Fail", "Fail", "Fail"]}
df = pd.DataFrame(data=d)

In [None]:
df

In [None]:
df.fillna(method="ffill")

### Puzzle 29

In [None]:
d29 = {'Step Number': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
       'Status': ['Passed', "Passed", "Passed", 'Passed', "Failed", 'Failed', "Failed", "Failed", "Failed", "Passed", "Passed", "Passed"]}
df29 = pd.DataFrame(data=d29)

In [None]:
df29

### Puzzle 30

### Puzzle 31

In [None]:
d31 = {'IntegerValue': [3759, 3760, 3761, 3762, 3763]}
df31 = pd.DataFrame(data=d31)

In [None]:
df31

In [None]:
df31['IntegerValue'].max()

### Puzzle #32

In [3]:
d32 = {'Spaceman ID': [1001, 2002, 3003, 4004, 5005, 6006, 7007, 8008, 9009],
       'Job Description': ['Astrogator', "Astrogator", "Astrogator", 'Geologist', "Geologist", 'Geologist', "Technician", "Technician", "Technician"],
       "Mission Count" : [6, 12, 17, 21, 9, 8, 13, 2, 7]}
df32 = pd.DataFrame(data=d32)

In [4]:
df32

Unnamed: 0,Spaceman ID,Job Description,Mission Count
0,1001,Astrogator,6
1,2002,Astrogator,12
2,3003,Astrogator,17
3,4004,Geologist,21
4,5005,Geologist,9
5,6006,Geologist,8
6,7007,Technician,13
7,8008,Technician,2
8,9009,Technician,7


#### Approach 1. With Apply. <br>
So, basically groupBy splits data by some column, then applies some operation and sticks together Series objects into new dataFrame

In [5]:
def examineData(data):
    print(type(data))
    print(data)
    #print(data.min())
    #print(type(data.min()))
    return data

In [6]:
df32.groupby("Job Description").apply(examineData)

<class 'pandas.core.frame.DataFrame'>
   Spaceman ID Job Description  Mission Count
0         1001      Astrogator              6
1         2002      Astrogator             12
2         3003      Astrogator             17
<class 'pandas.core.frame.DataFrame'>
   Spaceman ID Job Description  Mission Count
3         4004       Geologist             21
4         5005       Geologist              9
5         6006       Geologist              8
<class 'pandas.core.frame.DataFrame'>
   Spaceman ID Job Description  Mission Count
6         7007      Technician             13
7         8008      Technician              2
8         9009      Technician              7


Unnamed: 0,Spaceman ID,Job Description,Mission Count
0,1001,Astrogator,6
1,2002,Astrogator,12
2,3003,Astrogator,17
3,4004,Geologist,21
4,5005,Geologist,9
5,6006,Geologist,8
6,7007,Technician,13
7,8008,Technician,2
8,9009,Technician,7


In [7]:
def findMax(data):
    result = data[data['Mission Count'] == data['Mission Count'].max()].squeeze()
    return result

In [8]:
def findMin(data):
    result = data[data['Mission Count'] == data['Mission Count'].min()].squeeze()
    return result

In [9]:
df32.groupby("Job Description").apply(findMax)

Unnamed: 0_level_0,Spaceman ID,Job Description,Mission Count
Job Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Astrogator,3003,Astrogator,17
Geologist,4004,Geologist,21
Technician,7007,Technician,13


In [10]:
df32.groupby("Job Description").apply(findMin)

Unnamed: 0_level_0,Spaceman ID,Job Description,Mission Count
Job Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Astrogator,1001,Astrogator,6
Geologist,6006,Geologist,8
Technician,8008,Technician,2


In [11]:
dfMostLeast = df32.groupby("Job Description").apply(findMax)[['Spaceman ID']]

In [12]:
dfMostLeast = dfMostLeast.rename(columns={"Spaceman ID": "Most Experienced "})

In [13]:
dfMostLeast['Least Experienced'] = df32.groupby("Job Description").apply(findMin)[['Spaceman ID']]

In [14]:
dfMostLeast.head()

Unnamed: 0_level_0,Most Experienced,Least Experienced
Job Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Astrogator,3003,1001
Geologist,4004,6006
Technician,7007,8008


### Puzzle #33

In [7]:
d33_ = {'OrderID': [1, 2, 3],
       'Product': ['Widget', "Gizmo", "Doodad"],
       'DaysToDeliver' : [7, 3, 9]}
df33_ = pd.DataFrame(data=d33_)

In [None]:
('AA-111','Widget',7),
('BB-222','Widget',2),
('CC-333','Widget',3),
('DD-444','Widget',1),
('AA-111','Gizmo',7),
('BB-222','Gizmo',2),
('AA-111','Doodad',7),
('DD-444','Doodad',1);

In [11]:
d33 = {'PartID': ['AA-111', 'BB-222', 'CC-333', 'DD-444', 'AA-111', 'BB-222', 'AA-111', 'DD-444'],
       'Product': ['Widget', "Widget", "Widget", 'Widget', "Gizmo", 'Gizmo', "Doodad", "Doodad"],
       'DaysToManufacture': [7, 2, 3, 1, 7, 2, 7, 1]}
df33 = pd.DataFrame(data=d33)

In [12]:
df33_

Unnamed: 0,OrderID,Product,DaysToDeliver
0,1,Widget,7
1,2,Gizmo,3
2,3,Doodad,9


In [13]:
df33

Unnamed: 0,PartID,Product,DaysToManufacture
0,AA-111,Widget,7
1,BB-222,Widget,2
2,CC-333,Widget,3
3,DD-444,Widget,1
4,AA-111,Gizmo,7
5,BB-222,Gizmo,2
6,AA-111,Doodad,7
7,DD-444,Doodad,1


In [27]:
df33grouped = df33.groupby("Product")['DaysToManufacture'].sum()

In [25]:
df33_.apply(lambda x: x['Product'], axis = 1)

0    Widget
1     Gizmo
2    Doodad
dtype: object

In [28]:
df33grouped.get(key = 'Doodad')

8

In [34]:
df33_.apply(lambda x: x if df33grouped.get(key = x['Product']) > x["DaysToDeliver"] else None, axis = 1)[["OrderID", 'Product']].dropna()

Unnamed: 0,OrderID,Product
0,1.0,Widget
1,2.0,Gizmo


### Puzzle #34

In [39]:
d34 = {'Order ID': [1, 2, 3, 4],
       'Customer ID': [1001, 1001, 2002, 3003],
       'Amount': [25, 50, 65, 50]}
df34 = pd.DataFrame(data=d34)

In [40]:
df34.head()

Unnamed: 0,Order ID,Customer ID,Amount
0,1,1001,25
1,2,1001,50
2,3,2002,65
3,4,3003,50


In [73]:
statement = (df34['Customer ID'] == 1001) & (df34['Amount'] == 50)

In [79]:
df34[~statement]

Unnamed: 0,Order ID,Customer ID,Amount
0,1,1001,25
2,3,2002,65
3,4,3003,50


### Puzzle #35

In [115]:
d35 = {'Invoice ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
       'Sales Rep ID': [1001, 2002, 3003, 5005, 7007, 1001, 2002, 3003, 6006, 8008],
       'Amount': ['$13,454', "$3,434", "$54,645", "$234,345", "$776", "$4,564", "$34,534", '$345', "$6,543", "$67"],
       "Sales Type": ['International', "International", 'International', 'International', 'International', 'Domestic', 'Domestic', 'Domestic', 'Domestic', 'Domestic']}
df35 = pd.DataFrame(data=d35)

In [116]:
df35

Unnamed: 0,Invoice ID,Sales Rep ID,Amount,Sales Type
0,1,1001,"$13,454",International
1,2,2002,"$3,434",International
2,3,3003,"$54,645",International
3,4,5005,"$234,345",International
4,5,7007,$776,International
5,6,1001,"$4,564",Domestic
6,7,2002,"$34,534",Domestic
7,8,3003,$345,Domestic
8,9,6006,"$6,543",Domestic
9,10,8008,$67,Domestic


In [124]:
counts = df35.groupby("Sales Rep ID").count()[['Sales Type']]

In [125]:
counts

Unnamed: 0_level_0,Sales Type
Sales Rep ID,Unnamed: 1_level_1
1001,2
2002,2
3003,2
5005,1
6006,1
7007,1
8008,1


In [129]:
counts[counts['Sales Type'] == 2].index

Int64Index([1001, 2002, 3003], dtype='int64', name='Sales Rep ID')

In [134]:
df35[~df35['Sales Rep ID'].isin(counts[counts['Sales Type'] == 2].index)]

Unnamed: 0,Invoice ID,Sales Rep ID,Amount,Sales Type
3,4,5005,"$234,345",International
4,5,7007,$776,International
8,9,6006,"$6,543",Domestic
9,10,8008,$67,Domestic


### Puzzle #36

In [179]:
d36 = {'Route ID': [1, 1, 2, 2, 3, 3, 4, 4],
       'Departure City': ["Austin", "Dallas", "Dallas", "Memphis", "Memphis", "Des Moines", "Dallas", "Des Moines"],
       'Arrival City': ["Dallas", "Austin", "Memphis", "Dallas", "Des Moines", "Memphis", "Des Moines", "Dallas"],
       "Cost": ["$100", "$100", "$200", "$200", "$300", "$300", "$400", "$400"]}
df36 = pd.DataFrame(data=d36)

In [180]:
df36

Unnamed: 0,Route ID,Departure City,Arrival City,Cost
0,1,Austin,Dallas,$100
1,1,Dallas,Austin,$100
2,2,Dallas,Memphis,$200
3,2,Memphis,Dallas,$200
4,3,Memphis,Des Moines,$300
5,3,Des Moines,Memphis,$300
6,4,Dallas,Des Moines,$400
7,4,Des Moines,Dallas,$400


In [203]:
def findPath(df):
    global chain
    chain = []
    firstRow = findStart(df)
    rowsChain = []
    rowsChain = findEnd(firstRow, df, rowsChain)
    return reportData(chain)

In [184]:
def findStart(df):
    for index, row in df36.iterrows():
        if row['Departure City'] == "Austin":
            return row

In [198]:
def findEnd(firstRow, df, rowsChain):
    global chain
    #print(firstRow["Route ID"])
    if firstRow["Route ID"] in [x['Route ID'] for x in rowsChain]:
        return None
    rowsChain.append(firstRow)

    town = firstRow['Arrival City']
    if town == "Des Moines":
        #print([x['Arrival City'] for x in rowsChain])
        chain.append(rowsChain)
    
    searchDF = df[df['Departure City'] == town]
    
    for index, row in df.iterrows():
        findEnd(row, df, rowsChain)

In [200]:
data = findPath(df36)

In [212]:
[[x["Cost"] for x in y] for y in data]

[['$100', '$200', '$300', '$400'], ['$100', '$200', '$300', '$400']]

In [215]:
def reportData(data):
    d = {'Route Path' : [[x['Arrival City'] for x in y] for y in data],
         'Total Cost' : [[x["Cost"] for x in y] for y in data]}
    print(d)

In [216]:
findPath(df36)

{'Route Path': [['Dallas', 'Memphis', 'Des Moines', 'Des Moines'], ['Dallas', 'Memphis', 'Des Moines', 'Des Moines']], 'Total Cost': [['$100', '$200', '$300', '$400'], ['$100', '$200', '$300', '$400']]}
