python-data-science · scardinamauro · Jan 9, 2018 · Jan 9, 2018
diff --git a/OfficeSupplies.csv b/OfficeSupplies.csv
@@ -0,0 +1,44 @@
+OrderDate,Region,Rep,Item,Units,Unit Price
+4-Jul-2014,East,Richard,Pen Set,62,4.99
+12-Jul-2014,East,Nick,Binder,29,1.99
+21-Jul-2014,Central,Morgan,Pen Set,55,12.49
+29-Jul-2014,East,Susan,Binder,81,19.99
+7-Aug-2014,Central,Matthew,Pen Set,42,23.95
+15-Aug-2014,East,Richard,Pencil,35,4.99
+24-Aug-2014,West,James,Desk,3,275
+1-Sep-2014,Central,Smith,Desk,2,125
+10-Sep-2014,Central,Bill,Pencil,7,1.29
+18-Sep-2014,East,Richard,Pen Set,16,15.99
+27-Sep-2014,West,James,Pen,76,1.99
+5-Oct-2014,Central,Morgan,Binder,28,8.99
+14-Oct-2014,West,Thomas,Binder,57,19.99
+22-Oct-2014,East,Richard,Pen,64,8.99
+31-Oct-2014,Central,Rachel,Pencil,14,1.29
+8-Nov-2014,East,Susan,Pen,15,19.99
+17-Nov-2014,Central,Alex,Binder,11,4.99
+25-Nov-2014,Central,Matthew,Pen Set,96,4.99
+4-Dec-2014,Central,Alex,Binder,94,19.99
+12-Dec-2014,Central,Smith,Pencil,67,1.29
+21-Dec-2014,Central,Rachel,Binder,28,4.99
+29-Dec-2014,East,Susan,Pen Set,74,15.99
+6-Jan-2015,East,Richard,Pencil,95,1.99
+15-Jan-2015,Central,Bill,Binder,46,8.99
+23-Jan-2015,Central,Matthew,Binder,50,19.99
+1-Feb-2015,Central,Smith,Binder,87,15
+9-Feb-2015,Central,Alex,Pencil,36,4.99
+18-Feb-2015,East,Richard,Binder,4,4.99
+26-Feb-2015,Central,Bill,Pen,27,19.99
+7-Mar-2015,West,James,Binder,7,19.99
+15-Mar-2015,West,James,Pencil,56,2.99
+24-Mar-2015,Central,Alex,Pen Set,50,4.99
+1-Apr-2015,East,Richard,Binder,60,4.99
+10-Apr-2015,Central,Rachel,Pencil,66,1.99
+18-Apr-2015,Central,Rachel,Pencil,75,1.99
+27-Apr-2015,East,Nick,Pen,96,4.99
+5-May-2015,Central,Alex,Pencil,90,4.99
+14-May-2015,Central,Bill,Pencil,53,1.29
+22-May-2015,West,Thomas,Pencil,32,1.99
+31-May-2015,Central,Bill,Binder,80,8.99
+8-Jun-2015,East,Richard,Binder,60,8.99
+17-Jun-2015,Central,Matthew,Desk,5,125
+25-Jun-2015,Central,Morgan,Pencil,90,4.99
diff --git a/__pycache__/exercise.cpython-36.pyc b/__pycache__/exercise.cpython-36.pyc
diff --git a/__pycache__/morestats.cpython-36.pyc b/__pycache__/morestats.cpython-36.pyc
diff --git a/__pycache__/supplies_analysis.cpython-36.pyc b/__pycache__/supplies_analysis.cpython-36.pyc
diff --git a/baseball.csv b/baseball.csv
diff --git a/baseball_analysis.py b/baseball_analysis.py
@@ -0,0 +1,22 @@
+import pandas as pd
+import morestats as m
+
+df = pd.read_csv('baseball.csv')
+
+# Find avg height, weight, age for all players using morestats
+
+avg_height = m.mean(df.Height)
+avg_weight = m.mean(df.Weight)
+avg_age = m.mean(df.Age)
+
+# Group by a team name and show mean height, weight, age
+teams = df.groupby(['Team']).mean()
+
+# Find aggregate stats for Arizona
+arizona = teams.loc['ARZ']
+
+# Find team with highest avg Height
+tallest_team = teams.idxmax()['Height']
+
+# Find a subset of the data
+teams.loc['BAL':'CLE','Height':'Weight']
diff --git a/exercise.py b/exercise.py
@@ -6,7 +6,14 @@
 def sort_by_last_name(people, order):
     # return full names sorted by last name in either ascending or descending order
     # add doctests make sure it passes
-    pass
+    '''
+    >>> sort_by_last_name(people, False)
+    ['Alex Bradino', 'Ken Jones', 'Bob Smith']
+    >>> sort_by_last_name(people, True)
+    ['Bob Smith', 'Ken Jones', 'Alex Bradino']
+    '''
+    return sorted(people, key = lambda x: x.split()[1], reverse=order)
+
 
 
 # problem 2
@@ -15,10 +22,14 @@ def sort_by_last_name(people, order):
 ages = [4, 9, 12]
 
 
-def create_dictionary_from_lists(names, ages):
+def create_dictionary_from_lists(names,ages):
     # {'James':4, 'Susan':9, 'Maggie':12}
     # add doctests make sure it passes
-    pass
+    '''
+    >>> create_dictionary_from_lists(names,ages)
+    {'James': 4, 'Maggie': 12, 'Susan': 9}
+    '''
+    return dict(zip(names, ages))
 
 
 # problem 3

diff --git a/morestats.py b/morestats.py
@@ -0,0 +1,65 @@
+#    How to run doctest:
+#    python -m doctest -v stats.py
+
+
+def add(num1, num2):
+    return num1 + num2
+
+
+def volume(length, width, height):
+    return length * width * height
+
+
+def mean(numbers):
+    return sum(numbers) / len(numbers)
+
+# 39127
+# sort -> 12379
+# middle -> 3
+
+# 391274
+# sort -> 123479
+# avg of middle -> 3+4/2 -> 3.5
+
+
+def median(numbers):
+    """Computes the median of a list of numbers.
+
+    argument: list of numbers
+    return: the median
+
+    >>> median([2,1,6])
+    2
+    >>> median([3,5,4,9])
+    4.5
+
+    """
+    numbers = sorted(numbers)
+    middle = len(numbers) // 2 # use '//' for integer division
+    if len(numbers) % 2 == 0:
+        # even list
+        return sum(numbers[middle - 1:middle + 1]) / 2
+    else:
+        # odd list
+        return numbers[middle]
+
+from collections import defaultdict
+def mode(numbers):
+    """Find the most common value in the list
+
+    argument: list of numbers
+    return: the mode
+
+    >>> mode([1,2,2,2,3,3,4])
+    2
+    """
+    d = defaultdict(int)
+    for num in numbers:
+        d[num] += 1
+    return sorted(d, key=lambda k: d[k])[-1]
+
+def variance(number, ddof):
+    return sum([(num - mean(number)) ** 2 for num in number]) / (len(number) - ddof)
+
+def stdev(numbers, ddof):
+    return variance(number, ddof) ** .5
diff --git a/supplies_analysis.py b/supplies_analysis.py
@@ -0,0 +1,24 @@
+import pandas as pd
+
+df = pd.read_csv('OfficeSupplies.csv')
+
+# Add a new column called Total Price = Units * Unit Prince_Fielder
+df['Total Price'] = df['Units'] * df['Unit Price']
+
+# Show the mean and the sum for each rep per region
+df.groupby(['Region','Rep'])['Total Price'].agg(['mean','sum'])
+
+# Show totals by region
+regions = df.groupby(['Region'])['Total Price'].agg(['sum']).reset_index()
+
+# Show reps per region
+reps = df.groupby(['Region'])['Rep'].unique().to_frame().reset_index()
+
+# Using 'concat' to join the series and dataframe
+merged = reps.merge(regions, on='Region').set_index('Region')
+
+# Create a new column containing the count of reps per region
+merged['count'] = merged.apply(lambda row: len(row['Rep']), axis=1)
+
+# Create a new colum for normalized
+merged['normalized'] = merged['sum'] / merged['count']