# ITNPBD2: Representing and Manipulating Data
## University of Stirling
## Dr. Kevin Swingler

# Python Dictionaries
- Data structures indexed by named keys
- Basic structure is an unsorted list of key-value pairs
- Stored as a hashmap for speed of access by key
- Enclosed in `{}` braces

In [25]:
modules = {'ITNPBD1':'Mathematics for Big Data',
        'ITNPBD2':'Representing and Manipulating Data',
        'ITNPBD3':'Relational and Non-Relational Databases'}

print(modules)

values = {"string" : '\"This is an example of a string\"',
        'float' : '1.0', # we can use either '' or "" for both keys and values
         "integer" : "1"}

display(values)

{'ITNPBD1': 'Mathematics for Big Data', 'ITNPBD2': 'Representing and Manipulating Data', 'ITNPBD3': 'Relational and Non-Relational Databases'}


{'string': '"This is an example of a string"', 'float': '1.0', 'integer': '1'}

## Access a single entry by its key

In [2]:
print(modules['ITNPBD2'])

print(modules['ITNPBD3'])

print(values['string'])
print(values['float'])
print(values['integer'])

Representing and Manipulating Data
Relational and Non-Relational Databases
"This is an example of a string"
1.0
1


## List all the keys and values

In [3]:
for mcode, description in modules.items():
    print(f"The module {mcode} is called {description}")
    
for v, w in values.items():
    print(f"Example of a '{v}' could be {w}")

The module ITNPBD1 is called Mathematics for Big Data
The module ITNPBD2 is called Representing and Manipulating Data
The module ITNPBD3 is called Relational and Non-Relational Databases
Example of a 'string' could be "This is an example of a string"
Example of a 'float' could be 1.0
Example of a 'integer' could be 1


## Add a new entry with `[ ]`

In [4]:
modules['ITNPBD4'] = 'Scientific and Commercial Applications of Big Data'

values['character'] = '\'c\''

for key, value in values.items():
    print(f"values = key is {key} : value is {value}")

values = key is string : value is "This is an example of a string"
values = key is float : value is 1.0
values = key is integer : value is 1
values = key is character : value is 'c'


## Loop through just the keys

In [5]:
for mcode in modules:
    print(mcode)
    print(modules[mcode])  # Can also use mcode to access value
        
print("\nPrint just dictionary's keys")
for key in values:
    print(key)

print("\nPrint just dictionary's values")
for v in values:
    print(values[v])
    
print("\nPrint dictionary's keys and values")    
for key in values:
    print(key, ":", values[key])

    


ITNPBD1
Mathematics for Big Data
ITNPBD2
Representing and Manipulating Data
ITNPBD3
Relational and Non-Relational Databases
ITNPBD4
Scientific and Commercial Applications of Big Data

Print just dictionary's keys
string
float
integer
character

Print just dictionary's values
"This is an example of a string"
1.0
1
'c'

Print dictionary's keys and values
string : "This is an example of a string"
float : 1.0
integer : 1
character : 'c'


## Nesting objects
- Values can be any Python object
- For example lists or dictionaries

In [6]:
modules['ITNPBD6'] = ['Data Mining', 'Machine Learning', 'Data Visualisation']
print(modules['ITNPBD6'])
print(modules['ITNPBD6'][0])

values['string'] = ['we\'re', 'adding', 'a', 'list', 'to', 'values', 'dictionary', 'at', 'key', 'equal', 'string']

display(values)

print("\n", values['string'][0],end = ' ')
print(values['string'][1], end = ' ')
print(values["string"][2], end = " ") # it doesn't matter if it's double or single quotes
print(values['string'][3], '\n')

print(values['string'], '\n')

for s in values['string']:
    print(s, end = ' ')

['Data Mining', 'Machine Learning', 'Data Visualisation']
Data Mining


{'string': ["we're",
  'adding',
  'a',
  'list',
  'to',
  'values',
  'dictionary',
  'at',
  'key',
  'equal',
  'string'],
 'float': '1.0',
 'integer': '1',
 'character': "'c'"}


 we're adding a list 

["we're", 'adding', 'a', 'list', 'to', 'values', 'dictionary', 'at', 'key', 'equal', 'string'] 

we're adding a list to values dictionary at key equal string 

In [11]:
bd6_dict = {'name':'Data Analytics','topics':['Data Mining','Machine Learning','Data Visualisation'],'Lecturer':'Kevin Swingler'}
print(bd6_dict)

that_is_how_you_can_initialize_a_dictionary = {
    'bruh' : 'what\'s up',
    'well spoken bruh' : ['To', 'put', 'it', 'lightly', 'not', 'much'], # dictionary keys can have spaces!
    'dictionary_bruh' : 'I\'m a very inteligent bruh'
}

print(that_is_how_you_can_initialize_a_dictionary)

for well_spoken_bruh in that_is_how_you_can_initialize_a_dictionary['well spoken bruh']:
    print(well_spoken_bruh, end = ' ')

{'name': 'Data Analytics', 'topics': ['Data Mining', 'Machine Learning', 'Data Visualisation'], 'Lecturer': 'Kevin Swingler'}
{'bruh': "what's up", 'well spoken bruh': ['To', 'put', 'it', 'lightly', 'not', 'much'], 'dictionary_bruh': "I'm a very inteligent burh"}
To put it lightly not much 

In [26]:
modules['ITNPBD6'] = bd6_dict
print(modules['ITNPBD6'])
print(modules['ITNPBD6']['Lecturer'])
print(modules['ITNPBD6']['topics'][1])

print(values['integer']) # the value under 'ingteger' key before overwriting it with a new value
values['integer'] = that_is_how_you_can_initialize_a_dictionary # overwrite the value under 'integer' key with 
print('\n',values['integer']['bruh'])
print(values['integer']['well spoken bruh'])     # print with print()
for b in values['integer']['well spoken bruh']:  # print with for loop
    print(b, end = ' ')
print('\n',values['integer']['dictionary_bruh']) # print the last key 'dictionary_bruh' from 'that_is_how_you_can_initialize_a_dictionary' dictionary

{'name': 'Data Analytics', 'topics': ['Data Mining', 'Machine Learning', 'Data Visualisation'], 'Lecturer': 'Kevin Swingler'}
Kevin Swingler
Machine Learning
1

 what's up
['To', 'put', 'it', 'lightly', 'not', 'much']
To put it lightly not much 
 I'm a very inteligent burh


# JSON
Dictionaries look a lot like JSON there are some differences:
- JSON is just a string based data representation, but a dictionary is a data structure
- The key in a dictionary is hashed to aid fast in memory access
- The key in JSON must be a string. In a dictionary, it can be any hashable type
- Single quotes in a dictionary, double quotes in JSON
## Main Python methods
- `json.loads()` Produces dictionary from json string
- `json.dumps()` Produces json string from object

In [None]:
import json

json_string = '{"Name":"Kevin", "Age":50}'
dict_obj = {"Name":"Kevin", "Age":50}

dict_from_string = json.loads(json_string)
string_from_dict = json.dumps(dict_obj)

print(json_string, type(json_string))
print(dict_obj, type(dict_obj))
print(dict_from_string, type(dict_from_string))
print(string_from_dict, type(string_from_dict))


In [None]:
mod_str=json.dumps(modules, indent=2)
print(mod_str)

### To do the same with files, not strings, drop the `s`, so `dump` or `load`

In [None]:
with open('/Users/mateuszzaremba/dev/Python/MovieData.json') as f:
    movies = json.load(f)

print(movies)

## Here we have loaded an array of json objects about movies
- Let's find all the top level keys

In [None]:
keyset = set()
for mov in movies:
    for k in mov:
        keyset.add(k)
        
print(keyset)

In [None]:
display(movies[0])

## Pick out a single field

In [None]:
print(movies[0]['genres'])

## Now pull out all the genres with a similar pattern of code:

In [None]:
genres = set()
for mov in movies:
    genres.add(mov['genres'])  # Won't work - you cannot have a set of lists
        
print(genres)

## You cannot have a set of mutable lists - must convert to an imutable type, e.g. tuple
- Discuss this for a minute or two!!

In [None]:
genres = set()
for mov in movies:
    genres.add(tuple(mov['genres']))  # Won't work - you cannot have a set of lists
        
print(genres)

## Maybe that is not what we really want,
- Try again for a list of genres

In [None]:
genres = set()
for mov in movies:
    for genre in mov['genres']:
        genres.add(genre)
        
print(genres)

## That is more like it.
- Something more challenging now - find the average rating by genre
- This time we will build a list of dicts of the form `{genrename: {'num':number of examples, 'rating': av rating}}` 

In [None]:
genres = {}
for mov in movies:
    #print(mov['imdb'])
    for genre in mov['genres']:
        if mov['imdb']['rating'] is not None:    # Delete this first to see the problem it is fixing!
            if genre not in genres:
                genres[genre] = {'num':1, 'rating':int(mov['imdb']['rating'])}
            else:
                genres[genre]['num'] += 1
                genres[genre]['rating'] += int(mov['imdb']['rating'])
        
print(genres)
for genre in genres:
    genres[genre]['rating'] = genres[genre]['rating']/genres[genre]['num']
print(genres)

# Sets, lists and dicts Summary
- You can have a list of dicts
- You can have a list of sets
- Both are mutable
- You cannot have a set of mutable objects like lists or dicts
- You can have a immutable objects like tuples

In [None]:
list_of_dicts = [{'a':1},{'b':2}]
list_of_dicts

In [None]:
list_of_sets = [set((1,2,3)),set((3,3,4))]
list_of_sets

In [None]:
set_of_dicts = set(({'a':1},{'b':2}))


In [None]:
set_of_lists = set(([1, 2, 4],[1,2,3]))


In [None]:
set_of_tuples = set(((1, 2, 4),(1,2,3)))
set_of_tuples