# Python Review Quiz

### Introduction

In this lesson, we'll work with writing some Python functions.

### Loading our Data

In [14]:
import pandas as pd

df = pd.read_csv('./mxmh_survey_results.csv', index_col = 0)
df.columns = [c.replace(' ', '_').replace('[', '').replace(']', '').lower() for c in df.columns]

In [18]:
# df

In [17]:
selected_columns = ['age', 'primary_streaming_service', 'hours_per_day', 'while_working',
       'instrumentalist', 'fav_genre', 'exploratory',
       'foreign_languages', 'bpm', 'anxiety', 'depression', 'insomnia',
       'ocd', 'music_effects']

In [25]:
selected_df = df[selected_columns]

music_records = selected_df.to_dict('records')

In [26]:
music_records[:2]

[{'age': 18.0,
  'primary_streaming_service': 'Spotify',
  'hours_per_day': 3.0,
  'while_working': 'Yes',
  'instrumentalist': 'Yes',
  'fav_genre': 'Latin',
  'exploratory': 'Yes',
  'foreign_languages': 'Yes',
  'bpm': 156.0,
  'anxiety': 3.0,
  'depression': 0.0,
  'insomnia': 1.0,
  'ocd': 0.0,
  'music_effects': nan},
 {'age': 63.0,
  'primary_streaming_service': 'Pandora',
  'hours_per_day': 1.5,
  'while_working': 'Yes',
  'instrumentalist': 'No',
  'fav_genre': 'Rock',
  'exploratory': 'Yes',
  'foreign_languages': 'No',
  'bpm': 119.0,
  'anxiety': 7.0,
  'depression': 2.0,
  'insomnia': 2.0,
  'ocd': 1.0,
  'music_effects': nan}]

### Filtering and Mapping

Begin by writing a function that will allow us to specify a favorite genre, and it will only return records of that genre.  

> It should return a list of dictionaries of matching records.  List comprehension is preferred.

In [33]:
def find_by_genre(music_records, genre):
    return [music_record for music_record in music_records if music_record['fav_genre'] == genre]

In [43]:
rock_records = find_by_genre(music_records, 'Rock')

# rock_records[:2]

# [{'age': 63.0,
#   'primary_streaming_service': 'Pandora',
#   'hours_per_day': 1.5,
#   'while_working': 'Yes',
#   'instrumentalist': 'No',
#   'fav_genre': 'Rock',
#   'exploratory': 'Yes',
#   'foreign_languages': 'No',
#   'bpm': 119.0,
#   'anxiety': 7.0,
#   'depression': 2.0,
#   'insomnia': 2.0,
#   'ocd': 1.0,
#   'music_effects': nan},
#  {'age': 19.0,
#   'primary_streaming_service': 'Spotify',
#   'hours_per_day': 6.0,
#   'while_working': 'Yes',
#   'instrumentalist': 'No',
#   'fav_genre': 'Rock',
#   'exploratory': 'No',
#   'foreign_languages': 'No',
#   'bpm': 94.0,
#   'anxiety': 2.0,
#   'depression': 0.0,
#   'insomnia': 0.0,
#   'ocd': 0.0,
#   'music_effects': 'Improve'}]

Now write a function that return True if all music records have the same genre, and otherwise will return False.

In [38]:
def all_same_genre(music_records):
    return len(set([music_record['fav_genre'] for music_record in music_records])) == 1

In [42]:
all_same_genre(rock_records)

True

Next write a function called select_attributes that given a list of dictionaries, will return that same list of dictionaries, but each dictionary should only have the specified attributes.

In [45]:
specified_attrs = ['age', 'hours_per_day', 'fav_genre', 
 'bpm', 'anxiety', 'depression', 'insomnia']

In [47]:
def select_attributes(record, specified_attrs):
    return {k:v for k, v in record.items() if k in specified_attrs}

def records_with_selected_attributes(records, specified_attrs):
    return [select_attributes(record, specified_attrs) for record in records]
        

In [48]:
records_selected_attrs = records_with_selected_attributes(music_records, specified_attrs)

In [50]:
records_selected_attrs[:2]

[{'age': 18.0,
  'hours_per_day': 3.0,
  'fav_genre': 'Latin',
  'bpm': 156.0,
  'anxiety': 3.0,
  'depression': 0.0,
  'insomnia': 1.0},
 {'age': 63.0,
  'hours_per_day': 1.5,
  'fav_genre': 'Rock',
  'bpm': 119.0,
  'anxiety': 7.0,
  'depression': 2.0,
  'insomnia': 2.0}]

### Data Analysis

Now create a dictionary that has for keys has each of the favorite genres, and for values has the average of the specified attribute. For example if we specify `anxiety`, it will provide the average anxiety level associated with that genre, and if we provide hours per day it will return the average `hours_per_day` associated with that genre.  Round the average to two digits.

In [54]:
def histogram_builder(music_records, attribute):
    genres = list(set([music_record['fav_genre'] for music_record in music_records]))
    histogram = {}
    for genre in genres:
        genre_records = find_by_genre(music_records, genre)
        total_attr = sum([genre_record[attribute] for genre_record in genre_records])
        avg_attr = total_attr/len(genre_records)
        histogram[genre] = round(avg_attr, 2)
    return histogram
        

anxiety_avg = histogram_builder(music_records, 'anxiety')
depression_avg = histogram_builder(music_records, 'depression')
hours_per_day_avg = histogram_builder(music_records, 'hours_per_day')

In [56]:
print(anxiety_avg)

# {'Lofi': 6.1, 'K pop': 6.23, 'Country': 5.4, 'Metal': 5.76, 'Classical': 4.89, 'Video game music': 5.89, 'Rap': 5.09, 'Hip hop': 6.2, 'Jazz': 5.9, 'EDM': 5.49, 'Latin': 4.33, 'Folk': 6.57, 'Gospel': 4.83, 'Rock': 6.12, 'Pop': 6.07, 'R&B': 5.17}

{'Lofi': 6.1, 'K pop': 6.23, 'Country': 5.4, 'Metal': 5.76, 'Classical': 4.89, 'Video game music': 5.89, 'Rap': 5.09, 'Hip hop': 6.2, 'Jazz': 5.9, 'EDM': 5.49, 'Latin': 4.33, 'Folk': 6.57, 'Gospel': 4.83, 'Rock': 6.12, 'Pop': 6.07, 'R&B': 5.17}


So these are our three histograms.

In [60]:
# depression_avg

In [62]:
anxiety_avg = {'Lofi': 6.1, 'K pop': 6.23, 'Country': 5.4,
               'Metal': 5.76, 'Classical': 4.89, 'Video game music': 5.89, 'Rap': 5.09,
               'Hip hop': 6.2, 'Jazz': 5.9, 'EDM': 5.49,
               'Latin': 4.33, 'Folk': 6.57, 'Gospel': 4.83,
               'Rock': 6.12, 'Pop': 6.07, 'R&B': 5.17}
depression_avg = {'Lofi': 6.6, 'K pop': 4.42, 'Country': 4.32,
 'Metal': 5.07, 'Classical': 4.08, 'Video game music': 4.48, 'Rap': 4.0,
 'Hip hop': 5.8, 'Jazz': 4.5, 'EDM': 5.24, 'Latin': 3.0, 'Folk': 5.07, 'Gospel': 2.67,
 'Rock': 5.24, 'Pop': 4.49, 'R&B': 3.83}

hours_per_day_avg = {'Lofi': 4.2, 'K pop': 4.0, 'Country': 3.42,
 'Metal': 3.6, 'Classical': 2.88, 'Video game music': 2.91,
 'Rap': 5.32, 'Hip hop': 3.93, 'Jazz': 5.42, 'EDM': 4.59,
 'Latin': 6.67, 'Folk': 3.24, 'Gospel': 2.38, 'Rock': 3.63,
 'Pop': 2.91, 'R&B': 3.47}

Now let's create a nested list where each outer list represents a different genre, and in that list each element is the genre name, the second value is the associated `anxiety_avg`, the second is the associated `depression_avg`, and the last is `hours_per_day_avg`.

> You can see the result below.

In [77]:
avgs = [anxiety_avg, depression_avg, hours_per_day_avg]


sorted_avgs = [sorted(avg.items(), key = lambda x: x[0]) for avg in avgs]
sorted_genres = [genre[0] for genre in sorted_avgs[0]]
genre_values = []
for sorted_avg in sorted_avgs:
    genre_values.append([genre[1] for genre in sorted_avg])
    
list(zip(sorted_genres, genre_values[0], genre_values[1], genre_values[2]))

# [('Classical', 4.89, 4.08, 2.88),
#  ('Country', 5.4, 4.32, 3.42),
#  ('EDM', 5.49, 5.24, 4.59),
#  ('Folk', 6.57, 5.07, 3.24),
#  ('Gospel', 4.83, 2.67, 2.38),
#  ('Hip hop', 6.2, 5.8, 3.93),
#  ('Jazz', 5.9, 4.5, 5.42),
#  ('K pop', 6.23, 4.42, 4.0),
#  ('Latin', 4.33, 3.0, 6.67),
#  ('Lofi', 6.1, 6.6, 4.2),
#  ('Metal', 5.76, 5.07, 3.6),
#  ('Pop', 6.07, 4.49, 2.91),
#  ('R&B', 5.17, 3.83, 3.47),
#  ('Rap', 5.09, 4.0, 5.32),
#  ('Rock', 6.12, 5.24, 3.63),
#  ('Video game music', 5.89, 4.48, 2.91)]

[('Classical', 4.89, 4.08, 2.88),
 ('Country', 5.4, 4.32, 3.42),
 ('EDM', 5.49, 5.24, 4.59),
 ('Folk', 6.57, 5.07, 3.24),
 ('Gospel', 4.83, 2.67, 2.38),
 ('Hip hop', 6.2, 5.8, 3.93),
 ('Jazz', 5.9, 4.5, 5.42),
 ('K pop', 6.23, 4.42, 4.0),
 ('Latin', 4.33, 3.0, 6.67),
 ('Lofi', 6.1, 6.6, 4.2),
 ('Metal', 5.76, 5.07, 3.6),
 ('Pop', 6.07, 4.49, 2.91),
 ('R&B', 5.17, 3.83, 3.47),
 ('Rap', 5.09, 4.0, 5.32),
 ('Rock', 6.12, 5.24, 3.63),
 ('Video game music', 5.89, 4.48, 2.91)]

Finally, turn our resulting `genre_avgs` into a dictionary where the keys are the names of the genres, and each value is a *list* of the averages above.

In [80]:
genre_avgs = [('Classical', 4.89, 4.08, 2.88),
 ('Country', 5.4, 4.32, 3.42),
 ('EDM', 5.49, 5.24, 4.59),
 ('Folk', 6.57, 5.07, 3.24),
 ('Gospel', 4.83, 2.67, 2.38),
 ('Hip hop', 6.2, 5.8, 3.93),
 ('Jazz', 5.9, 4.5, 5.42),
 ('K pop', 6.23, 4.42, 4.0),
 ('Latin', 4.33, 3.0, 6.67),
 ('Lofi', 6.1, 6.6, 4.2),
 ('Metal', 5.76, 5.07, 3.6),
 ('Pop', 6.07, 4.49, 2.91),
 ('R&B', 5.17, 3.83, 3.47),
 ('Rap', 5.09, 4.0, 5.32),
 ('Rock', 6.12, 5.24, 3.63),
 ('Video game music', 5.89, 4.48, 2.91)]

def build_avg_histogram(genre_avgs):
    avg_histogram = {}
    for genre_avg in genre_avgs:
        key = genre_avg[0]
        values = genre_avg[1:]
        avg_histogram[key] = values
    avg_histogram
    
avg_histogram

# {'Classical': (4.89, 4.08, 2.88),
#  'Country': (5.4, 4.32, 3.42),
#  'EDM': (5.49, 5.24, 4.59),
#  'Folk': (6.57, 5.07, 3.24),
#  'Gospel': (4.83, 2.67, 2.38),
#  'Hip hop': (6.2, 5.8, 3.93),
#  'Jazz': (5.9, 4.5, 5.42),
#  'K pop': (6.23, 4.42, 4.0),
#  'Latin': (4.33, 3.0, 6.67),
#  'Lofi': (6.1, 6.6, 4.2),
#  'Metal': (5.76, 5.07, 3.6),
#  'Pop': (6.07, 4.49, 2.91),
#  'R&B': (5.17, 3.83, 3.47),
#  'Rap': (5.09, 4.0, 5.32),
#  'Rock': (6.12, 5.24, 3.63),
#  'Video game music': (5.89, 4.48, 2.91)}

{'Classical': (4.89, 4.08, 2.88),
 'Country': (5.4, 4.32, 3.42),
 'EDM': (5.49, 5.24, 4.59),
 'Folk': (6.57, 5.07, 3.24),
 'Gospel': (4.83, 2.67, 2.38),
 'Hip hop': (6.2, 5.8, 3.93),
 'Jazz': (5.9, 4.5, 5.42),
 'K pop': (6.23, 4.42, 4.0),
 'Latin': (4.33, 3.0, 6.67),
 'Lofi': (6.1, 6.6, 4.2),
 'Metal': (5.76, 5.07, 3.6),
 'Pop': (6.07, 4.49, 2.91),
 'R&B': (5.17, 3.83, 3.47),
 'Rap': (5.09, 4.0, 5.32),
 'Rock': (6.12, 5.24, 3.63),
 'Video game music': (5.89, 4.48, 2.91)}