Create a list 

In [12]:
# Understanding and mastering Python data structures is crucial for data engineering tasks. Here, we'll cover the main data structures
#  with examples and some cool methods to manipulate data.

# 1. Lists

# Lists are ordered collections of items that are mutable and can contain mixed data types. It supports heterogeneous data types.

In [13]:
# Creating a list
data = [10, 20, 30, 40, 50]

# Accessing elements
print(data[0])  # Output: 10

# Adding elements
data.append(60)
print(data)  # Output: [10, 20, 30, 40, 50, 60]

# Removing elements
data.remove(30)
print(data)  # Output: [10, 20, 40, 50, 60]

# List comprehension
squared_data = [x**2 for x in data] # List comprehension is a concise way to create lists.
                                    # It is a powerful tool to create lists by iterating over an iterable object.
                                    # It is similar to the lambda function in Python and the only difference is that it can be used inside the list.
print(squared_data)  # Output: [100, 400, 1600, 2500, 3600]


10
[10, 20, 30, 40, 50, 60]
[10, 20, 40, 50, 60]
[100, 400, 1600, 2500, 3600]


In [14]:
print(lambda x: x**2, data) # Output: [100, 400, 1600, 2500, 3600]
print(type(lambda x: x**2)) # Output: <class 'function'>

<function <lambda> at 0x1071601f0> [10, 20, 40, 50, 60]
<class 'function'>


In [15]:
mylist = ['a','b','c']

Printing a list

In [16]:
print(mylist)

['a', 'b', 'c']


creating a empty list

In [17]:
my_empty_list = list()

In [18]:
print(my_empty_list)

[]


List can contain multiple datatypes

In [19]:
mylist2 = [5, True, "Hello"] # Since it allows heterogeneous data types, we can have a list with different data types.

In [20]:
print(mylist2)

[5, True, 'Hello']


List allows duplicate elements

In [21]:
mylist3 = [5,5, "True", False, False]

In [22]:
print(mylist3)

[5, 5, 'True', False, False]


Accessing an element (Accessed using the square brackets[])

In [23]:
print(mylist3[1])

5


Accessing index which is not present 

In [24]:
print(mylist3[5])

IndexError: list index out of range

Accessing the negative index

In [None]:
print(mylist3[-1])

False


Iterating through a loop

In [None]:
[i for i in mylist3]

[5, 5, 'True', False, False]

Checking if a element or item is present in the list

In [None]:
print(f"preview of element of mylist3 is {mylist3} ")
if "True" in mylist3:
    print("yes element ""True"" is present")
else:
    print("No, the element ""True"" is not present")

preview of element of mylist3 is [5, 5, 'True', False, False] 
yes element True is present


Check the no of elements in the list

In [None]:
len(mylist3)

5

Append items to the list

In [None]:
mylist3.append("example append")

In [None]:
print(mylist3)

[5, 5, 'True', False, False, 'example append']


Appending items at a index position

In [None]:
mylist3.insert(0, "Apple")

In [None]:
print(mylist3)

['Apple', 5, 5, 'True', False, False, 'example append']


Remove items from the list

In [None]:
item = mylist3.pop() #This pop method will remove the very last element from the list
print(item)

example append


In [None]:
print(mylist3)

['Apple', 5, 5, 'True', False, False]


In [None]:
mylist4 = ['Apple', 5, 5, 'True', False, False]

mylist4.insert(1, "Banana")

print(mylist4)

# The index of previous element will be shifted to the right where as
# pop method will shift the index of the element to the left.

['Apple', 'Banana', 5, 5, 'True', False, False]


We can also remove a element by specifying the exact element  

In [None]:
print(mylist3)
mylist3.remove("Apple")
print(f"Updated list is {mylist3}")

# If any element is not present in the list, it will throw an error.
# if any duplicate element is present, it will remove the first element from the list.

NameError: name 'mylist3' is not defined

what if we remove a element which doesnt exist

In [None]:
mylist3.remove('non_existant_element')

ValueError: list.remove(x): x not in list

Remove all the elements in the list with clear method

In [None]:
mylist3.clear()

In [None]:
print(mylist3)

[]


Reverse the list elements 

In [None]:
mylist2

[5, True, 'Hello']

In [None]:
mylist4 = ["a","b","c"]
mylist4

['a', 'b', 'c']

In [None]:
mylist4.reverse() #reverse the original list and use that list. For every iteration it flips back and forth 
print(mylist4)

['c', 'b', 'a']


In [None]:
mylist4.reverse() #reverse the original list and use that list. For every iteration it flips back and forth 
print(mylist4)

['a', 'b', 'c']


Below code doesnt work since the reverse doesnt work with assignment

In [None]:
mylist4_reversed = mylist4.reverse()
print(mylist4_reversed)

None


Soring a list

In [None]:
mylist5 = [1,3,4,5,2]

In [None]:
mylist5.sort()
mylist5

[1, 2, 3, 4, 5]

In [None]:
mylist6 = [8,4,2,1,0]

In [None]:
mylist6_sorted = sorted(mylist6)

In [None]:
print(mylist6)
print(mylist6_sorted)

[8, 4, 2, 1, 0]
[0, 1, 2, 4, 8]


So we can sort a original list using ".sort" or sorted(list) unlike reverse since it doesnt allow assignment of reversed list

create a new list with multiple similar type of elements

In [None]:
mylist7  = [9] * 10

In [None]:
print(mylist7)

[9, 9, 9, 9, 9, 9, 9, 9, 9, 9]


we can join two lists using the '+' sign like a concatenate function

In [None]:
mylist8 = ["Tom and"]
mylist9 = ["Jerry"]
mylist10 = [2022]

my_final_list_8_9_10 = mylist8 + mylist9 + mylist10 

print(my_final_list_8_9_10)

['Tom and', 'Jerry', 2022]


Slicing lists 

In [None]:
mylist11 = [1,2,3,4,5,6,7,8,9]
# mylist11[start_index : stop_index]

mylist11_modified_1 = mylist[:]
print(f"mylist11_modified_1 is {mylist11_modified_1}") #Prints all the elements

mylist11_modified_2 = mylist11[0:1]
print(f"mylist11_modified_2 is {mylist11_modified_2}") #Takes the zeroth index and takes elements until stopindex - 1

mylist11_modified_3 = mylist11[0:2]
print(f"mylist11_modified_3 is {mylist11_modified_3}") #Takes the zeroth index and takes elements until  stopindex - 1

mylist11_modified_4 = mylist11[:0]
print(f"mylist11_modified_4 is {mylist11_modified_4}") #Since the stop index is the first elements itself and we know stop_index gets excluded

mylist11_modified_5 = mylist11[:1]
print(f"mylist11_modified_5 is {mylist11_modified_5}") #Since the stop index is the first elements itself 

mylist11_modified_6 = mylist11[:-1]
print(f"mylist11_modified_6 is {mylist11_modified_6}") #It will exlude the stop index element which in this case is -1

mylist11_modified_7 = mylist11[-1:]
print(f"mylist11_modified_7 is {mylist11_modified_7}") #takes the start element as the last element and it stops in reverse pattern to print till start

mylist11_modified_8 = mylist11[-1:-1]
print(f"mylist11_modified_8 is {mylist11_modified_8}") #If we give the start and stop of the list as same element it doesnt give anything for -1 

mylist11_modified_9 = mylist11[1:1]
print(f"mylist11_modified_9 is {mylist11_modified_9}") #If we give the start and stop of the list as same element it doesnt give anything for regular index as well

mylist11_modified_10 = mylist11[1:2]
print(f"mylist11_modified_10 is {mylist11_modified_10}") #Regular slicing

mylist11_modified_1 is ['a', 'b', 'c']
mylist11_modified_2 is [1]
mylist11_modified_3 is [1, 2]
mylist11_modified_4 is []
mylist11_modified_5 is [1]
mylist11_modified_6 is [1, 2, 3, 4, 5, 6, 7, 8]
mylist11_modified_7 is [9]
mylist11_modified_8 is []
mylist11_modified_9 is []
mylist11_modified_10 is [2]


we will use a step index in a list
list(start_index: stop_index : step_index)

In [None]:
print(f"main list is {mylist11}")

mylist11_modified_12 = mylist11[1:7:1]
print(f"mylist11_modified_12 is {mylist11_modified_12}") #Prints all the elements beween the start and stop index with step count of 1

mylist11_modified_13 = mylist11[1:7:2]
print(f"mylist11_modified_13 is {mylist11_modified_13}") #Prints all the elements beween the start and stop index with step count of 2 with start index as beginning element

mylist11_modified_14 = mylist11[1:7:-1]
print(f"mylist11_modified_14 is {mylist11_modified_14}") #Prints all the elements beween the start and stop index with step count of -1 

mylist11_modified_15 = mylist11[::-1]
print(f"mylist11_modified_15 is {mylist11_modified_15}") #Prints all the elements in reverse order (Nice way to reverse a list)

main list is [1, 2, 3, 4, 5, 6, 7, 8, 9]
mylist11_modified_12 is [2, 3, 4, 5, 6, 7]
mylist11_modified_13 is [2, 4, 6]
mylist11_modified_14 is []
mylist11_modified_15 is [9, 8, 7, 6, 5, 4, 3, 2, 1]


copy one list to another new list

In [None]:
#Way 1
mylist11_copied  = mylist11.copy()
#Way 2
mylist11_duplicate = list(mylist11)
#Way 3 
mylist11_twin = mylist11[:]

In [None]:
print(mylist11)
print(mylist11_duplicate)
print(mylist11_twin)

[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9]


Operation on each element inside the list



In [None]:
print(mylist11)
mylist12 = [i*i for i in mylist11]
print(mylist12)

[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 4, 9, 16, 25, 36, 49, 64, 81]


In [42]:
# The differece between deep and shallow copy is that deep copy creates a new object and does not reference the original object.
# Shallow copy creates a new object but references the original object.

mylist20 = [[1,2,3],[4,5,6],[7,8,9]]
mylist21 = mylist20.copy() # Both have same memory location
mylist23 = mylist20[:]
mylist24 = list(mylist20)
# The above 3 methods are shallow copy methods
# If we change the original list, the copied list will be affected.
mylist21[0][0] = 25

mylist27 = [[10,20,30],[40,50,60],[70,80,90]]

# Below is the deep copy method
import copy
mylist28 = copy.deepcopy(mylist27) # This is a deep copy which will create a new object and does not reference the original object.
# now if we change the original list, the copied list will not be affected.
mylist28[1][0] = 100

print(mylist20)
print(mylist21) # This is a shallow copy 
print(mylist23)
print(mylist24) 

print(mylist28) # This is a deep copy which will 
print(mylist27) # prinitng the original list after changing the element 



[[25, 2, 3], [4, 5, 6], [7, 8, 9]]
[[25, 2, 3], [4, 5, 6], [7, 8, 9]]
[[25, 2, 3], [4, 5, 6], [7, 8, 9]]
[[25, 2, 3], [4, 5, 6], [7, 8, 9]]
[[10, 20, 30], [100, 50, 60], [70, 80, 90]]
[[10, 20, 30], [40, 50, 60], [70, 80, 90]]
0


In [38]:
#List me all the methods of list

# 1. append() - Adds an element at the end of the list
# 2. clear() - Removes all the elements from the list
# 3. copy() - Returns a copy of the list
# 4. count() - Returns the number of elements with the specified value
# 5. extend() - Add the elements of a list (or any iterable), to the end of the current list
# 6. index() - Returns the index of the first element with the specified value
# 7. insert() - Adds an element at the specified position
# 8. pop() - Removes the element at the specified position
# 9. remove() - Removes the item with the specified value
# 10. reverse() - Reverses the order of the list
# 11. sort() - Sorts the list
