Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions Supplies.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
OrderDate,Region,Rep,Item,Units,Unit Price
4-Jul-14,East,Richard,Pen Set,62,4.99
12-Jul-14,East,Nick,Binder,29,1.99
21-Jul-14,Central,Morgan,Pen Set,55,12.49
29-Jul-14,East,Susan,Binder,81,19.99
7-Aug-14,Central,Matthew,Pen Set,42,23.95
15-Aug-14,East,Richard,Pencil,35,4.99
24-Aug-14,West,James,Desk,3,275
1-Sep-14,Central,Smith,Desk,2,125
10-Sep-14,Central,Bill,Pencil,7,1.29
18-Sep-14,East,Richard,Pen Set,16,15.99
27-Sep-14,West,James,Pen,76,1.99
5-Oct-14,Central,Morgan,Binder,28,8.99
14-Oct-14,West,Thomas,Binder,57,19.99
22-Oct-14,East,Richard,Pen,64,8.99
31-Oct-14,Central,Rachel,Pencil,14,1.29
8-Nov-14,East,Susan,Pen,15,19.99
17-Nov-14,Central,Alex,Binder,11,4.99
25-Nov-14,Central,Matthew,Pen Set,96,4.99
4-Dec-14,Central,Alex,Binder,94,19.99
12-Dec-14,Central,Smith,Pencil,67,1.29
21-Dec-14,Central,Rachel,Binder,28,4.99
29-Dec-14,East,Susan,Pen Set,74,15.99
6-Jan-15,East,Richard,Pencil,95,1.99
15-Jan-15,Central,Bill,Binder,46,8.99
23-Jan-15,Central,Matthew,Binder,50,19.99
1-Feb-15,Central,Smith,Binder,87,15
9-Feb-15,Central,Alex,Pencil,36,4.99
18-Feb-15,East,Richard,Binder,4,4.99
26-Feb-15,Central,Bill,Pen,27,19.99
7-Mar-15,West,James,Binder,7,19.99
15-Mar-15,West,James,Pencil,56,2.99
24-Mar-15,Central,Alex,Pen Set,50,4.99
1-Apr-15,East,Richard,Binder,60,4.99
10-Apr-15,Central,Rachel,Pencil,66,1.99
18-Apr-15,Central,Rachel,Pencil,75,1.99
27-Apr-15,East,Nick,Pen,96,4.99
5-May-15,Central,Alex,Pencil,90,4.99
14-May-15,Central,Bill,Pencil,53,1.29
22-May-15,West,Thomas,Pencil,32,1.99
31-May-15,Central,Bill,Binder,80,8.99
8-Jun-15,East,Richard,Binder,60,8.99
17-Jun-15,Central,Matthew,Desk,5,125
25-Jun-15,Central,Morgan,Pencil,90,4.99
Binary file added __pycache__/exercise.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/morestats.cpython-36.pyc
Binary file not shown.
1,016 changes: 1,016 additions & 0 deletions baseball.csv

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions baseball_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pandas as import pd
import morestats as m

df = pd.read_csv('baseball.csv')

#find average height, weight, age for all players using morestats

avg_height = m.mean(df.Height)
avg_weight = m.mean(df.Weight)
avg_age = m.mean(df.Age)

#group by team name and show mean height, weight, ages
teams = df.groupby(['Team']).mean()

#find aggregate stats for Arizona
arizona = teams.loc[ARZ]

#which team has the greatest average Height
greatest = teams.idxmax()['Height']
37 changes: 30 additions & 7 deletions exercise.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,55 @@
# problem 1
# ------------------------------------------------------------------- #
people = ['Bob Smith', 'Ken Jones', 'Alex Bradino']
"""
python -m doctest -v exercise.py

"""

people = ['Bob Smith', 'Ken Jones', 'Alex Bradino']

def sort_by_last_name(people, order):
# return full names sorted by last name in either ascending or descending order
"""
sorts list alphabetically
>>> sort_by_last_name(people,False)
['Alex Bradino', 'Ken Jones', 'Bob Smith']
>>> sort_by_last_name(people,True)
['Bob Smith', 'Ken Jones', 'Alex Bradino']
"""
# return full names sorted by last name in ascending order
# ['Alex Bradino', 'Ken Jones', 'Bob Smith']
# add doctests make sure it passes
pass
return sorted(people, key=lambda person: person.split()[-1], reverse=order)


# problem 2
# ------------------------------------------------------------------- #
names = ['James', 'Susan', 'Maggie']
ages = [4, 9, 12]


def create_dictionary_from_lists(names, ages):
"""
creates a new dictionary from lists names and ages
>>> create_dictionary_from_lists(names, ages)
{'James': 4, 'Maggie': 12, 'Susan': 9}
"""
# {'James':4, 'Susan':9, 'Maggie':12}
# add doctests make sure it passes
pass

mydict = {}
for i in range(len(names)):
mydict[names[i]] = ages[i]
return mydict

# problem 3
# ------------------------------------------------------------------- #
numbers = [5, 6, 7, 8, 9, 10, 11, 12]


def square_even_values_and_sum_under_10(numbers):
"""
the squares for only the even values in list numbers that are less than 10
>>> square_even_values_and_sum_under_10(numbers)
100
"""
# 6^2 + 8^2]
# add doctests make sure it passes
pass
return sum([n**2 for n in numbers if (n<10 and n%2==0)])
78 changes: 78 additions & 0 deletions morestats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
print('Hello World!')
"""
python -m doctest -v morestats.py

"""
def add(num1, num2):
return num1 + num2

print(add(2,3))


#compute the volume of a rectangle
def vrectangle(length, width, height):
return length * width * height

#compute the mean of a bunch of numbers
def mean(numbers):
return sum(numbers) /len(numbers)

#compute the median of a bunch of numbers
def median(numbers):
"""
Computes the median of a list of numbers
argument: list of numbers
return the median
>>> median([2,1,6])
2
>>> median([3,5,4,9])
4.5
"""
numbers = sorted(numbers)
middle = len(numbers) // 2
if len(numbers) % 2 == 0:
# even list
return sum(numbers[middle - 1:middle + 1]) / 2
else:
# odd list
return numbers[middle]

from collections import defaultdict
def mode(numbers):
"""
finds the most frequent value of a list
>>> mode([1,1,1,1,1,1,4,5,6])
1
>>> mode([1,2,2,2,3,3,4])
2
"""
d = defaultdict(int)
for num in numbers:
d[num] += 1
return sorted(d, key=lambda k:d[k]) [-1]

#varience tells us about the spread of the data
#the square root of varience is the standard deviation
#1 standard deviation is also called 1 sigma
#compute varience

numbers = [1, 2, 3, 4, 5, 6, 7]
def variance (numbers, ddof):
"""
determines the variance of a set of numbers
>>> variance(numbers, 0)
1.3720238095238095

"""
return sum([(num - mean(numbers)) ** 2 for num in numbers]) / (len(numbers) - ddof)


def stdev (numbers, ddof):
"""
finds the standard deviation for a population or a sample
>>> stdev(numnbers, 0)
1
>>> stdev(numnbers, 1)
2
"""
return variance(numbers, ddof) ** 0.5
25 changes: 25 additions & 0 deletions supplies_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pandas as pd
df = pd.read_csv('supplies.csv')

#add a new column called total = units * unitsprice
df['Total'] = df['Units'] * df['Unit Price']

#show the mean, sum for each rep per region
regions = df.groupby(['Region','Rep'])['Total'].agg(['mean', 'sum', 'count'])

#which are the largest?
largestthree = df.groupby(['Region','Rep'])['Total'].agg(['mean', 'sum', 'count']).nlargest(3, 'mean')

regions = df.groupby(['Region'])['Total'].agg(['sum'])
reps = df.groupby(['Region'])['Rep'].unique()


#convert series into dataframe
rps = reps.to_frame()
reps = rps.reset_index()
regions = regions.reset_index()
merged = pd.merge(reps, regions, on='Region', how='inner').set_index('Region')


merged['count'] = merged.apply(lambda row: len(row['Rep']), axis=1)
merged['normalized'] = merged['sum'] / merged['count']