Skip to content
Open

done #10

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions OfficeSupplies.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
OrderDate,Region,Rep,Item,Units,Unit Price
4-Jul-2014,East,Richard,Pen Set,62,4.99
12-Jul-2014,East,Nick,Binder,29,1.99
21-Jul-2014,Central,Morgan,Pen Set,55,12.49
29-Jul-2014,East,Susan,Binder,81,19.99
7-Aug-2014,Central,Matthew,Pen Set,42,23.95
15-Aug-2014,East,Richard,Pencil,35,4.99
24-Aug-2014,West,James,Desk,3,275
1-Sep-2014,Central,Smith,Desk,2,125
10-Sep-2014,Central,Bill,Pencil,7,1.29
18-Sep-2014,East,Richard,Pen Set,16,15.99
27-Sep-2014,West,James,Pen,76,1.99
5-Oct-2014,Central,Morgan,Binder,28,8.99
14-Oct-2014,West,Thomas,Binder,57,19.99
22-Oct-2014,East,Richard,Pen,64,8.99
31-Oct-2014,Central,Rachel,Pencil,14,1.29
8-Nov-2014,East,Susan,Pen,15,19.99
17-Nov-2014,Central,Alex,Binder,11,4.99
25-Nov-2014,Central,Matthew,Pen Set,96,4.99
4-Dec-2014,Central,Alex,Binder,94,19.99
12-Dec-2014,Central,Smith,Pencil,67,1.29
21-Dec-2014,Central,Rachel,Binder,28,4.99
29-Dec-2014,East,Susan,Pen Set,74,15.99
6-Jan-2015,East,Richard,Pencil,95,1.99
15-Jan-2015,Central,Bill,Binder,46,8.99
23-Jan-2015,Central,Matthew,Binder,50,19.99
1-Feb-2015,Central,Smith,Binder,87,15
9-Feb-2015,Central,Alex,Pencil,36,4.99
18-Feb-2015,East,Richard,Binder,4,4.99
26-Feb-2015,Central,Bill,Pen,27,19.99
7-Mar-2015,West,James,Binder,7,19.99
15-Mar-2015,West,James,Pencil,56,2.99
24-Mar-2015,Central,Alex,Pen Set,50,4.99
1-Apr-2015,East,Richard,Binder,60,4.99
10-Apr-2015,Central,Rachel,Pencil,66,1.99
18-Apr-2015,Central,Rachel,Pencil,75,1.99
27-Apr-2015,East,Nick,Pen,96,4.99
5-May-2015,Central,Alex,Pencil,90,4.99
14-May-2015,Central,Bill,Pencil,53,1.29
22-May-2015,West,Thomas,Pencil,32,1.99
31-May-2015,Central,Bill,Binder,80,8.99
8-Jun-2015,East,Richard,Binder,60,8.99
17-Jun-2015,Central,Matthew,Desk,5,125
25-Jun-2015,Central,Morgan,Pencil,90,4.99
Binary file added __pycache__/exercise.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/morestats.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/supplies_analysis.cpython-36.pyc
Binary file not shown.
1,016 changes: 1,016 additions & 0 deletions baseball.csv

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions baseball_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd
import morestats as m

df = pd.read_csv('baseball.csv')

# Find avg height, weight, age for all players using morestats

avg_height = m.mean(df.Height)
avg_weight = m.mean(df.Weight)
avg_age = m.mean(df.Age)

# Group by a team name and show mean height, weight, age
teams = df.groupby(['Team']).mean()

# Find aggregate stats for Arizona
arizona = teams.loc['ARZ']

# Find team with highest avg Height
tallest_team = teams.idxmax()['Height']

# Find a subset of the data
teams.loc['BAL':'CLE','Height':'Weight']
17 changes: 14 additions & 3 deletions exercise.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
def sort_by_last_name(people, order):
# return full names sorted by last name in either ascending or descending order
# add doctests make sure it passes
pass
'''
>>> sort_by_last_name(people, False)
['Alex Bradino', 'Ken Jones', 'Bob Smith']
>>> sort_by_last_name(people, True)
['Bob Smith', 'Ken Jones', 'Alex Bradino']
'''
return sorted(people, key = lambda x: x.split()[1], reverse=order)



# problem 2
Expand All @@ -15,10 +22,14 @@ def sort_by_last_name(people, order):
ages = [4, 9, 12]


def create_dictionary_from_lists(names, ages):
def create_dictionary_from_lists(names,ages):
# {'James':4, 'Susan':9, 'Maggie':12}
# add doctests make sure it passes
pass
'''
>>> create_dictionary_from_lists(names,ages)
{'James': 4, 'Maggie': 12, 'Susan': 9}
'''
return dict(zip(names, ages))


# problem 3
Expand Down
65 changes: 65 additions & 0 deletions morestats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# How to run doctest:
# python -m doctest -v stats.py


def add(num1, num2):
return num1 + num2


def volume(length, width, height):
return length * width * height


def mean(numbers):
return sum(numbers) / len(numbers)

# 39127
# sort -> 12379
# middle -> 3

# 391274
# sort -> 123479
# avg of middle -> 3+4/2 -> 3.5


def median(numbers):
"""Computes the median of a list of numbers.

argument: list of numbers
return: the median

>>> median([2,1,6])
2
>>> median([3,5,4,9])
4.5

"""
numbers = sorted(numbers)
middle = len(numbers) // 2 # use '//' for integer division
if len(numbers) % 2 == 0:
# even list
return sum(numbers[middle - 1:middle + 1]) / 2
else:
# odd list
return numbers[middle]

from collections import defaultdict
def mode(numbers):
"""Find the most common value in the list

argument: list of numbers
return: the mode

>>> mode([1,2,2,2,3,3,4])
2
"""
d = defaultdict(int)
for num in numbers:
d[num] += 1
return sorted(d, key=lambda k: d[k])[-1]

def variance(number, ddof):
return sum([(num - mean(number)) ** 2 for num in number]) / (len(number) - ddof)

def stdev(numbers, ddof):
return variance(number, ddof) ** .5
24 changes: 24 additions & 0 deletions supplies_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import pandas as pd

df = pd.read_csv('OfficeSupplies.csv')

# Add a new column called Total Price = Units * Unit Prince_Fielder
df['Total Price'] = df['Units'] * df['Unit Price']

# Show the mean and the sum for each rep per region
df.groupby(['Region','Rep'])['Total Price'].agg(['mean','sum'])

# Show totals by region
regions = df.groupby(['Region'])['Total Price'].agg(['sum']).reset_index()

# Show reps per region
reps = df.groupby(['Region'])['Rep'].unique().to_frame().reset_index()

# Using 'concat' to join the series and dataframe
merged = reps.merge(regions, on='Region').set_index('Region')

# Create a new column containing the count of reps per region
merged['count'] = merged.apply(lambda row: len(row['Rep']), axis=1)

# Create a new colum for normalized
merged['normalized'] = merged['sum'] / merged['count']