In [2]:
import pandas as pd
import numpy as np

### Question 1

Taken from https://leetcode.com/discuss/interview-question/391865/Facebook-Software-Engineer-Phone-Screen-Interview-Questions-or-REJECT

You will be supplied with two data files in CSV format . The first file contains statistics about various dinosaurs. The second file contains additional data. Given the following formula, speed = ((STRIDE_LENGTH / LEG_LENGTH) - 1) * SQRT(LEG_LENGTH * g) Where g = 9.8 m/s^2 (gravitational constant)

Write a program to read in the data files from disk, it must then print the names of only the bipedal dinosaurs from fastest to slowest. Do not print any other information.


`$ cat dataset1.csv NAME,LEG_LENGTH,DIET Hadrosaurus,1.4,herbivore Struthiomimus,0.72,omnivore Velociraptor,1.8,carnivore Triceratops,0.47,herbivore Euoplocephalus,2.6,herbivore Stegosaurus,1.50,herbivore Tyrannosaurus Rex,6.5,carnivore`

`$ cat dataset2.csv NAME,STRIDE_LENGTH,STANCE Euoplocephalus,1.97,quadrupedal Stegosaurus,1.70,quadrupedal Tyrannosaurus Rex,4.76,bipedal Hadrosaurus,1.3,bipedal Deinonychus,1.11,bipedal Struthiomimus,1.24,bipedal Velociraptorr,2.62,bipedal`

In [3]:
df = pd.DataFrame

In [4]:
d1,d2 = pd.read_csv('dataset1.csv'),pd.read_csv('dataset2.csv')

The thing to note here is that there are 7 items in each file, but 2 odd ducks and a misspelling. 
- The misspelling is Velociraptorr, which needs to be fixed before the merge.
- Triceratops is only in file 1 and Deinonychus is only in file two.

If you keep those dinos in, steps with break on the NaNs where there's not joining data.

So with 6 working dinos per file, you should only need 6 items

In [5]:
d1, d2

(                NAME  LEG_LENGTH       DIET
 0        Hadrosaurus        1.40  herbivore
 1      Struthiomimus        0.72   omnivore
 2       Velociraptor        1.80  carnivore
 3        Triceratops        0.47  herbivore
 4     Euoplocephalus        2.60  herbivore
 5        Stegosaurus        1.50  herbivore
 6  Tyrannosaurus Rex        6.50  carnivore,
                 NAME  STRIDE_LENGTH       STANCE
 0     Euoplocephalus           1.97  quadrupedal
 1        Stegosaurus           1.70  quadrupedal
 2  Tyrannosaurus Rex           4.76      bipedal
 3        Hadrosaurus           1.30      bipedal
 4        Deinonychus           1.11      bipedal
 5      Struthiomimus           1.24      bipedal
 6      Velociraptorr           2.62      bipedal)

In [6]:
# Fix the Velociraptor mispelling
d2.replace(to_replace='Velociraptorr', value='Velociraptor', inplace=True)

In [8]:
d2[d2['NAME'] == 'Velociraptor'] # fixed

Unnamed: 0,NAME,STRIDE_LENGTH,STANCE
6,Velociraptor,2.62,bipedal


In [13]:
# This method safely drops Triceratops and Deinonychus with the default inner join
df = d1.merge(d2,on='NAME').copy()

In [14]:
df

Unnamed: 0,NAME,LEG_LENGTH,DIET,STRIDE_LENGTH,STANCE
0,Hadrosaurus,1.4,herbivore,1.3,bipedal
1,Struthiomimus,0.72,omnivore,1.24,bipedal
2,Velociraptor,1.8,carnivore,2.62,bipedal
3,Euoplocephalus,2.6,herbivore,1.97,quadrupedal
4,Stegosaurus,1.5,herbivore,1.7,quadrupedal
5,Tyrannosaurus Rex,6.5,carnivore,4.76,bipedal


#### to show the difference, we can include everything with an outer join, but it leaves the dangling dinos

In [23]:
df10 = pd.merge(d1,d2,on='NAME',how='outer')
df10

Unnamed: 0,NAME,LEG_LENGTH,DIET,STRIDE_LENGTH,STANCE
0,Hadrosaurus,1.4,herbivore,1.3,bipedal
1,Struthiomimus,0.72,omnivore,1.24,bipedal
2,Velociraptor,1.8,carnivore,2.62,bipedal
3,Triceratops,0.47,herbivore,,
4,Euoplocephalus,2.6,herbivore,1.97,quadrupedal
5,Stegosaurus,1.5,herbivore,1.7,quadrupedal
6,Tyrannosaurus Rex,6.5,carnivore,4.76,bipedal
7,Deinonychus,,,1.11,bipedal


#### the formula for speed
speed = ((STRIDE_LENGTH / LEG_LENGTH) - 1) * SQRT(LEG_LENGTH * g) Where g = 9.8 m/s^2 (gravitational constant)

In [80]:
# set variable g
g=9.8
# create 'speed' feature
df['speed'] = ((df.STRIDE_LENGTH / df.LEG_LENGTH) -1 )* np.sqrt(df.LEG_LENGTH * g)

#### sort bipedals

In [81]:
df[df['STANCE'] == 'bipedal'].sort_values('speed',ascending=False)

Unnamed: 0,NAME,LEG_LENGTH,DIET,STRIDE_LENGTH,STANCE,speed
1,Struthiomimus,0.72,omnivore,1.24,bipedal,1.918448
2,Velociraptor,1.8,carnivore,2.62,bipedal,1.913333
0,Hadrosaurus,1.4,herbivore,1.3,bipedal,-0.264575
5,Tyrannosaurus Rex,6.5,carnivore,4.76,bipedal,-2.136513


#### Return only the names in order of speed

In [82]:

df[df['STANCE'] == 'bipedal'].sort_values('speed',ascending=False)['NAME']

1        Struthiomimus
2         Velociraptor
0          Hadrosaurus
5    Tyrannosaurus Rex
Name: NAME, dtype: object

### how to do this with regular python


In [26]:
import math, csv

In [51]:
dinos={}
g=9.8

In [55]:
with open('dataset2.csv','r') as f:
    rows = csv.reader(f)
    for row in rows:
        NAME, STRIDE_LENGTH, STANCE = row[0], row[1],row[2]
        if NAME == 'Velociraptorr':
            NAME = 'Velociraptor'
        if STANCE == 'bipedal':
            dinos[NAME] = float(STRIDE_LENGTH)

In [83]:
with open('dataset1.csv','r') as f:
    rows = csv.reader(f)
    for row in rows:
        NAME, LEG_LENGTH, DIET = row[0], row[1], row[2]
        if NAME in dinos:
            STRIDE_LENGTH, LEG_LENGTH =  dinos[NAME], float(LEG_LENGTH)
            dinos[NAME] = ((STRIDE_LENGTH / LEG_LENGTH) - 1) * (math.sqrt(LEG_LENGTH) * g)

In [84]:
[x[0] for x in sorted(dinos.items(), key=lambda x: x[1], reverse=True)]

['Struthiomimus',
 'Velociraptor',
 'Deinonychus',
 'Hadrosaurus',
 'Tyrannosaurus Rex']

#### As a function

In [49]:
def dinospeed(file1, file2):
    dinos = {}
    g=9.8
    with open(file2, 'r') as f:
        rows = csv.reader(f)
        for row in rows:
            NAME, STRIDE_LENGTH, STANCE = row[0], row[1],row[2]
            if NAME == 'Velociraptorr':
                NAME = 'Velociraptor'
            if STANCE == 'bipedal':
                dinos[NAME] = float(STRIDE_LENGTH)
    with open(file1, 'r') as f:
        rows = csv.reader(f)
        for row in rows:
            NAME, LEG_LENGTH, DIET = row[0], row[1],row[2]
            if NAME in dinos:
                STRIDE_LENGTH, LEG_LENGTH =  dinos[NAME], float(LEG_LENGTH)
                dinos[NAME] = ((STRIDE_LENGTH / LEG_LENGTH) - 1) * (math.sqrt(LEG_LENGTH) * g)
    return [x[0] for x in sorted(dinos.items(), key=lambda x: x[1], reverse=True)]

In [50]:
dinospeed('dataset1.csv','dataset2.csv')

['Struthiomimus',
 'Velociraptor',
 'Deinonychus',
 'Hadrosaurus',
 'Tyrannosaurus Rex']

### Question 2:
Given an array of integers greater than zero, find if it is possible to split it in two subarrays (without reordering the elements), such that the sum of the two subarrays is the same. Print the two subarrays.

I calculated rightSum initially by using sum(nums), then looped over nums, and added nums[i] to leftSum, and subtracted it from rightSum, then checked if there was a match and printed that.

In [90]:
import random

In [91]:
myarr = [random.randint(1,4096) for x in range(10)]

In [66]:
m = int(len(myarr)/2)

In [68]:
left = myarr[:m]
right = myarr[m:]

In [69]:
sum(left) == sum(right)

False

In [136]:
def splitArraysSame(arr):
    """
    This only covers the case for an even split of the elements
    """
    m = int(len(arr)/2)
    left = arr[:m]
    right = arr[m:]
    return sum(left) == sum(right)

In [103]:
# loop through - compare x + loop
for x in range(0,len(myarr3) -1):
    print(sum(myarr3[:x]) == sum(myarr[x:]))

False
False
True
False
False
False
False
False
False


#### as a function

In [94]:
def iterateArraySame(arr):
    """
    This works.
    """
    p=0
    for x in range(0,len(arr) -1):
        if sum(arr[:x]) == sum(arr[x:]):
            p+=1
    if p>0:
        return True
    else:
        return False


In [95]:
iterateArraySame(myarr)

False

#### in a lambda

In [86]:
def lambdaEqualChunk(arr):
    return set(list(map(lambda xxx: arr[:xxx] == arr[xxx:], arr)))

In [92]:
lambdaEqualChunk(myarr)

{False}

### Question 3:

There is a matrix with . and X, where X represents battleship, always of length 3. Battleship can be vertical or horizontal, never diagonal. Given a function bomb_at(i,j), returns True if battleship is present at (i,j) in the matrix. Print the head, middle, tail coordinates of the battleship.

In [61]:
# test harness
matrix = [['.','.','.','.','.'],['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','.','.']]

In [62]:
matrix

[['.', '.', '.', '.', '.'],
 ['.', '.', '.', 'X', '.'],
 ['.', '.', '.', 'X', '.'],
 ['.', '.', '.', 'X', '.'],
 ['.', '.', '.', '.', '.']]

In [64]:
import itertools

In [65]:
# test the iterator
battleship={}
for i, j in itertools.product(range(5), range(5)):
    if matrix[i][j] == 'X':
        battleship[i]=j
print(battleship)
    

{1: 3, 2: 3, 3: 3}


In [66]:
# use the notion of a bomb for a hit on a battleship
# create a test function bomb_at
def bomb_at(mat,i,j):
    if mat[i][j] =='X':
        return True
    

In [67]:
# 
size=len(matrix)
bombs=[]
for i, j in itertools.product(range(size), range(size)):
    if bomb_at(matrix,i,j):
        bombs.append((i,j))
if len(bombs) > 2 and len(bombs) < 4:
    print(bombs)
    

[(1, 3), (2, 3), (3, 3)]


In [199]:
# test harness for no bombs
matrix1= [['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','.','.']]

In [68]:
# test harness for too many bombs
matrix2= [['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','.','.']]

In [69]:
# test harness for too few bombs
matrix3= [['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','X','.'],['.','.','.','X','.'],['.','.','.','.','.']]

In [70]:
# test harness for horizontal bombs
matrix4= [['X','X','X','.','.'],['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','.','.'],['.','.','.','.','.']]

In [71]:
# test harness for diagonal bombs - this should break
matrix5= [['X','.','.','.','.'],['.','X','.','.','.'],['.','.','X','.','.'],['.','.','.','.','.'],['.','.','.','.','.']]

In [72]:
mat = matrix5
size=len(mat)
bombs=[]
for i, j in itertools.product(range(size), range(size)):
    if bomb_at(mat,i,j):
        bombs.append((i,j))
if len(bombs) > 2 and len(bombs) < 4:
    print(bombs)
else:
    print("no bombs or wrong number of bombs")

[(0, 0), (1, 1), (2, 2)]


In [338]:
# extend to show no or some bombs
# this breaks (shows) diagonals
mat = matrix
size=len(mat)
bombs=[]
for i, j in itertools.product(range(size), range(size)):
    if bomb_at(mat,i,j):
        bombs.append((i,j))
if len(bombs) > 2 and len(bombs) < 4:
    print(bombs)
elif len(bombs) > 0 and len(bombs) < 3:
    print('not enough bombs to matter')
elif len(bombs) > 3:
    print('too many bombs to worry about')
else:
    print("no bombs")

[(1, 3), (2, 3), (3, 3)]


In [73]:
# test harness
vert = [(1, 3), (2, 3), (3, 3)]
horz = [(0, 0), (0, 1), (0, 2)]
diag = [(0, 0), (1, 1), (2, 1)]

In [74]:
def notdiag(arr):
    """
    if vertical or horizontal, a set of either x or y should == 1.
    if diag both sets are > 1
    """
    return len(set([x[0] for x in arr])) == 1 or  len(set([x[1] for x in arr])) == 1

In [75]:
# fix to not return diagonals
mat = matrix
size=len(mat)
bombs=[]
for i, j in itertools.product(range(size), range(size)):
    if bomb_at(mat,i,j):
        bombs.append((i,j))
if len(bombs) > 2 and len(bombs) < 4 and notdiag(bombs):
    print(bombs)
elif len(bombs) > 0 and len(bombs) < 3:
    print('not enough bombs to matter')
elif len(bombs) > 3:
    print('too many bombs to worry about')
else:
    print("no vertical or horizontal bombs")

[(1, 3), (2, 3), (3, 3)]


In [76]:
bombs

[(1, 3), (2, 3), (3, 3)]

## Problem 4
Take the filenames like anakin_2010-02-18.csv and create a directory structure like so:

2010/02/18/starwars/

then move the file into it.

There are two classes - starwars and futurama

In [358]:
cd homework

/Users/person/Coding/Python/facebook_prep_and_notes/homework


In [375]:
import os, sys
from shutil import copyfile

In [400]:
ls| tail -10

yoda_2011-12-18.csv
yoda_2012-01-24.csv
yoda_2012-02-02.csv
yoda_2012-02-08.csv
yoda_2012-08-08.csv
yoda_2012-09-04.csv
yoda_2012-09-26.csv
yoda_2012-11-18.csv
yoda_2013-01-18.csv
yoda_2013-01-19.csv


In [363]:
futurama = ['hermes','nibbler','farnsworth','bender','leela']
starwars = ['jarjar','yoda','anakin','han','obiwan']

In [365]:
files = os.listdir()
cwd = os.getcwd()

In [414]:
charnames = set([x.split('_')[0] for x in files])

In [415]:
charnames

{'anakin',
 'bender',
 'farnsworth',
 'han',
 'hermes',
 'jarjar',
 'leela',
 'nibbler',
 'obiwan',
 'yoda'}

In [410]:
def propname(name):
    if name in starwars:
        return 'starwars'
    else:
        return 'futurama'

In [411]:
propname('leela')

'futurama'

In [416]:
for f in files:
    chunks = f.split("_")
    dates = chunks[1].split("-")
    title = propname(chunks[0])
    dirstr = '{}/{}/{}/{}/{}/'.format(cwd,dates[0],dates[1],dates[2].strip('.csv'),title)
    os.makedirs(dirstr, exist_ok=True)
    copyfile(f,dirstr + f)

In [77]:
ls

[1m[36m2009[m[m/
[1m[36m2010[m[m/
[1m[36m2011[m[m/
[1m[36m2014[m[m/
README.md
dataset1.csv
dataset2.csv
dino_awk.sh
dinos summarized solution in pandas.ipynb
dinos workbook.ipynb
dinosaurs1.csv
dinosaurs2.csv
facebook_interview_problems.ipynb
fb_problem
[1m[36mhomework[m[m/
homework.tar.gz
moby_dick.txt
speed_calc.py
