In [1]:
from pandas import DataFrame, Series
import numpy as np

#################
# Syntax Reminder:
#
# The following code would create a two-column pandas DataFrame
# named df with columns labeled 'name' and 'age':
#
# people = ['Sarah', 'Mike', 'Chrisna']
# ages  =  [28, 32, 25]
# df = DataFrame({'name' : Series(people),
#                 'age'  : Series(ages)})

def create_dataframe():
    '''
    Create a pandas dataframe called 'olympic_medal_counts_df' containing
    the data from the table of 2014 Sochi winter olympics medal counts.  

    The columns for this dataframe should be called 
    'country_name', 'gold', 'silver', and 'bronze'.  

    There is no need to  specify row indexes for this dataframe 
    (in this case, the rows will automatically be assigned numbered indexes).
    
    You do not need to call the function in your code when running it in the
    browser - the grader will do that automatically when you submit or test it.
    '''

    countries = ['Russian Fed.', 'Norway', 'Canada', 'United States',
                 'Netherlands', 'Germany', 'Switzerland', 'Belarus',
                 'Austria', 'France', 'Poland', 'China', 'Korea', 
                 'Sweden', 'Czech Republic', 'Slovenia', 'Japan',
                 'Finland', 'Great Britain', 'Ukraine', 'Slovakia',
                 'Italy', 'Latvia', 'Australia', 'Croatia', 'Kazakhstan']

    gold = [13, 11, 10, 9, 8, 8, 6, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
    silver = [11, 5, 10, 7, 7, 6, 3, 0, 8, 4, 1, 4, 3, 7, 4, 2, 4, 3, 1, 0, 0, 2, 2, 2, 1, 0]
    bronze = [9, 10, 5, 12, 9, 5, 2, 1, 5, 7, 1, 2, 2, 6, 2, 4, 3, 1, 2, 1, 0, 6, 2, 1, 0, 1]

    # your code here
    olympic_medal_counts_df = DataFrame({'country_name': Series(countries), 
                                         'gold': Series(gold), 
                                         'silver' : Series(silver), 
                                         'bronze' : Series(bronze)})
    
    return olympic_medal_counts_df

In [3]:
df = create_dataframe()

In [4]:
'''
Get a specific column - say gold
'''
df['gold']

0     13
1     11
2     10
3      9
4      8
5      8
6      6
7      5
8      4
9      4
10     4
11     3
12     3
13     2
14     2
15     2
16     1
17     1
18     1
19     1
20     1
21     0
22     0
23     0
24     0
25     0
Name: gold, dtype: int64

In [5]:
'''
Get all countries who got more than 5 golds
'''
df[df['gold'] > 5]

Unnamed: 0,bronze,country_name,gold,silver
0,9,Russian Fed.,13,11
1,10,Norway,11,5
2,5,Canada,10,10
3,12,United States,9,7
4,9,Netherlands,8,7
5,5,Germany,8,6
6,2,Switzerland,6,3


In [6]:
'''
Get all countries who got more than 5 golds and more than 7 silvers
'''
df[ (df.gold > 5) & (df.silver > 7)]


Unnamed: 0,bronze,country_name,gold,silver
0,9,Russian Fed.,13,11
2,5,Canada,10,10


In [7]:
def avg_medal_count():
    '''
    Using the dataframe's apply method, create a new Series called 
    avg_medal_count that indicates the average number of gold, silver,
    and bronze medals earned amongst countries who earned at 
    least one medal of any kind at the 2014 Sochi olympics.  Note that
    the countries list already only includes countries that have earned
    at least one medal. No additional filtering is necessary.
    
    You do not need to call the function in your code when running it in the
    browser - the grader will do that automatically when you submit or test it.
    '''
    olympic_medal_counts_df = df
    
    avg_medal_count = olympic_medal_counts_df[['gold','silver','bronze']].apply(np.mean)
    
    return avg_medal_count

In [8]:
avg_medal_count = avg_medal_count()
print avg_medal_count

gold      3.807692
silver    3.730769
bronze    3.807692
dtype: float64


In [9]:
df[df.gold >= 1]

Unnamed: 0,bronze,country_name,gold,silver
0,9,Russian Fed.,13,11
1,10,Norway,11,5
2,5,Canada,10,10
3,12,United States,9,7
4,9,Netherlands,8,7
5,5,Germany,8,6
6,2,Switzerland,6,3
7,1,Belarus,5,0
8,5,Austria,4,8
9,7,France,4,4


In [10]:
'''
Find the average number of bronze medals won by countries who won atleast 1 gold medal
'''
avg_bronze = np.mean(df['bronze'][df.gold>=1])
print avg_bronze

4.2380952381


In [11]:
def numpy_dot():
    '''
    Imagine a point system in which each country is awarded 4 points for each
    gold medal,  2 points for each silver medal, and one point for each 
    bronze medal.  

    Using the numpy.dot function, create a new dataframe called 
    'olympic_points_df' that includes:
        a) a column called 'country_name' with the country name
        b) a column called 'points' with the total number of points the country
           earned at the Sochi olympics.
           
    You do not need to call the function in your code when running it in the
    browser - the grader will do that automatically when you submit or test it.
    '''
    points = [4,2,1]
    medals = [df.gold, df.silver, df.bronze]

    olympic_points_df = DataFrame({'country_name': df.country_name,
                                   'points' : np.dot(points,medals)})
 
    # YOUR CODE HERE
    
    return olympic_points_df

In [12]:
olympic_points_df = numpy_dot()
print olympic_points_df

      country_name  points
0     Russian Fed.      83
1           Norway      64
2           Canada      65
3    United States      62
4      Netherlands      55
5          Germany      49
6      Switzerland      32
7          Belarus      21
8          Austria      37
9           France      31
10          Poland      19
11           China      22
12           Korea      20
13          Sweden      28
14  Czech Republic      18
15        Slovenia      16
16           Japan      15
17         Finland      11
18   Great Britain       8
19         Ukraine       5
20        Slovakia       4
21           Italy      10
22          Latvia       6
23       Australia       5
24         Croatia       2
25      Kazakhstan       1
