In [None]:
import json

<h1><b>Working with JSON strings and Files</h1></b>

In [None]:
json_string='{"name":["John","Qaiser","Farhan"],"age":["30","45","23"],"cars":["Ford", "BMW", "Fiat"]}'  # This is a JSON string and we converted it into a dictionary for working with it comfortably

In [None]:
data=json.loads(json_string)    # Make sure that you have an s after load as it is a JSON string

In [None]:
print(data['name'][0])
print(type(data))

John
<class 'dict'>


In [None]:
# Adding a new key to this JSON
data["test"]="True"

In [None]:
print(data)   # You can see that the test key has been added to the test dictionary but not to the json string
print(json_string)

{'name': ['John', 'Qaiser', 'Farhan'], 'age': ['30', '45', '23'], 'cars': ['Ford', 'BMW', 'Fiat'], 'test': 'True'}
{"name":["John","Qaiser","Farhan"],"age":["30","45","23"],"cars":["Ford", "BMW", "Fiat"]}


In [None]:
# Use the below statement to dump the dictionary into the json string
# You can either dump it into the new json string or dump it into tbhe old json string
# The indent argument value 2 prints the json string in a proper indentation
# Similarly you can increase the indentation level as required
# You can use the sort keys to sort the JSON, check the output
# One highly important thing to mention is that in JSON the boolean value needs to be written in lowercase such as true or false and not True or False

new_json=json.dumps(data,indent=2,sort_keys=True)
print(new_json)

{
  "age": [
    "30",
    "45",
    "23"
  ],
  "cars": [
    "Ford",
    "BMW",
    "Fiat"
  ],
  "name": [
    "John",
    "Qaiser",
    "Farhan"
  ],
  "test": "True"
}


In [None]:
# Working with a JSON File

with open("/content/data.json","r") as f:
  data=json.load(f)

data=json.dumps(data,indent=2,sort_keys=True)
f.close()
print(data)

{
  "age": [
    "30",
    "45",
    "23"
  ],
  "cars": [
    "Ford",
    "BMW",
    "Fiat"
  ],
  "name": [
    "John",
    "Qaiser",
    "Farhan"
  ],
  "test": "True"
}


In [None]:
with open("/content/data.json","r") as f:
  data=json.load(f)

with open("/content/data2.json","w") as f:
  json.dump(data,f,indent=2,sort_keys=True)

In [None]:
with open("/content/data.json","r") as f:
  data=json.load(f)

print(data)

{'age': ['30', '45', '23'], 'cars': ['Ford', 'BMW', 'Fiat'], 'name': ['John', 'Qaiser', 'Farhan'], 'test': 'True'}


<h1><b>Working with Generators</h1></b>

In [None]:
# Generators and iteratorrs are almost the same with some difference in syntax

# Iterator is a function that allows us to loop through the elements or any data type without storing them

# Another example of iterator is map, refer the example below
x=[1,2,3,4,5,6,7,8,9,10]

y=map(lambda i:i**2, x)  # Map function doesn't store all the results of the function calls , This function here is actually a generator or an iterator that allows us through all of the results without storing them 


print(y) # This will not print the values of y but will show the hash id of the map object y

# print(y[0]) this will throw an error as the map object is not subscriptable

print(list(y))  # List representation of the generator or iterator, we only generated these values when we called this function generator y, it gives the result instantly without storing them

# you can confirm this by running this cell multiple times, you will notice that the map object shows different Hash ID every time because it is generated every time we run it and hence proves that we do not store it

<map object at 0x7f0707dd9b10>
[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]


In [None]:
# You can use the next function to check what is the next element that will show up in the map object  # Another alternative for the next function is y.__next__()  - this is also called as dunder method
y=map(lambda i:i**2, x)
print(next(y))
print(next(y))
print(next(y))
print(next(y))  # After this statement try looping over y, you will observe that it will start looping from 5 to 10 and not 1 to 10 because the next element is 5
print("***********")
for i in y:    # Literally the for loop function is calling the next function on the iterator object and printing it
  print(i)

1
4
9
16
***********
25
36
49
64
81
100


In [None]:
# Another way to print the y elements is 

y=map(lambda i:i**2, x)

while True:
  try:
    print(next(y))
  except:   # here you can also write except StopIteration
    print('Done')
    break

1
4
9
16
25
36
49
64
81
100
Done


In [None]:
x=range(1,11)
print(x)
print(next(x))   # This will throw an error

range(1, 11)


TypeError: ignored

In [None]:
x=range(1,11)

print(next(iter(x))) # This will not throw an error instead will give the element that is next in number   # This is actually how the for loop works when we use for i in array, it calls the iter function and it gives us the next data in sequence

1


In [None]:
# Example of Generator

def gen(n):
  for i in range(n):   # What yield function does is as soon as it hits the yield keyword it pauses, saves the function at the particular moment, returns the i value to the function caller
    yield i            # The function caller prints the i value, goes back to the generator and again continues from the saved checkpoint

for i in gen(5):
  print(i)

0
1
2
3
4


In [None]:
x=gen(5)    # Generators use case is when you do not care about the before or next data in line but the current data that you are in need of

print(next(x))
print(next(x))
print(next(x))

0
1
2


In [None]:
# Generator use case
# For example you want to find a word in a file that has billions of words, how could you implement this
# First way:
#---> You could read the entire file into the memory and check one by one if the word exists in it. However this is not memory efficient
# Second way:
#---> You read only line or row of the file at a time and check if the word exists in that file, this way it would be quite memory efficient
# For the second way refer to tech with tim video of generators, refer the below piece of code

def csv_reader(filename):
  for row in open(filename):
    yield row

filename="/content/Accenture Data Science Questions.txt"
for i in csv_reader(filename):
  if "clustering" in i:
    print(i)
    print("Found it")
    break

14.what is clustering and explain various clustering algorithms

Found it


In [None]:
# Generator comprehensions

# Creating generators without defining functions

x=(i for i in range(10))

print(x)  # You get a generator object
print(next(x))

<generator object <genexpr> at 0x7f0707da2d50>
0
