In [1]:
import pandas as pd
import numpy as np


#  How to extend an iterable to the datastructure 

### List

In [2]:
list1 = [1, 2, 3]
list2 = [4, 5, 6]
list1.extend(list2)
list1

[1, 2, 3, 4, 5, 6]

### np.Array

In [3]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])
arr_combined = np.concatenate([arr1, arr2])
arr_combined

array([1, 2, 3, 4, 5, 6])

### Dictionary

In [4]:
dict1 = {'a': 1, 'b': 2}
dict2 = {'c': 3, 'd': 4}
dict1.update(dict2)
dict1

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

### Series

In [5]:
import pandas as pd

s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
s3 = pd.concat([s1, s2], ignore_index=True)
s3

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

### Dataframe

In [6]:
import pandas as pd
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
df3 = pd.concat([df1, df2], ignore_index=True)
df3

Unnamed: 0,A,B
0,1,3
1,2,4
2,5,7
3,6,8


# How to add an item to a data structure

### List

In [7]:
numbers = [1, 2, 3]
# to append the number 4
numbers.append(4)
numbers

[1, 2, 3, 4]

### Array

In [8]:
import numpy as np
arr = np.array([1, 2, 3])
python_list = arr.tolist()
python_list.append(4)
arr = np.array(python_list)
arr

array([1, 2, 3, 4])

### Dictionary- To add a new key,value pair

In [9]:
my_dict={'a':1,'b':2}
my_dict['c']=3
my_dict

{'a': 1, 'b': 2, 'c': 3}

### Dictionary- To add a new value in an existing key

In [10]:
my_dict={'a':1,'b':2}
if not isinstance(my_dict["a"], list):
    my_dict["a"] = [my_dict["a"]]
my_dict["a"].append(3)
my_dict

{'a': [1, 3], 'b': 2}

### Series

In [11]:
import numpy as np
s = pd.Series([1, 2, 3])
python_list = s.tolist()
python_list.append(4)
s = pd.Series(python_list)
s

0    1
1    2
2    3
3    4
dtype: int64

### Dataframe- Adding a new Column

In [12]:
df = pd.DataFrame({
    "names": ["Alice", "Bob"],
    "ages": [25, 30]
})

In [13]:
df["gender"] = ["Female", "Male"]
df

Unnamed: 0,names,ages,gender
0,Alice,25,Female
1,Bob,30,Male


### Dataframe- Adding a new Row

In [14]:
new_row = {"names": "Charlie", "ages": 28, "gender": "Male"}
new_row_df = pd.DataFrame([new_row]) # convert new row into a dataframe
df = pd.concat([df, new_row_df], ignore_index=True) # use concat
df

Unnamed: 0,names,ages,gender
0,Alice,25,Female
1,Bob,30,Male
2,Charlie,28,Male


#  How to insert an item at a position to the datastructure 

### List

In [15]:
lst = [1, 2, 3]
lst.insert(1, 1.5)  # Insert 1.5 at position 1
lst

[1, 1.5, 2, 3]

### np.Array

In [16]:
arr = np.array([1, 2, 3])
arr = np.insert(arr, 1, 1.5)  # Insert 1.5 at position 1
arr

array([1, 1, 2, 3])

### Dictionary - Not Possible

### Series

In [17]:
s_list = s.tolist()
s_list.insert(1, 1.5)
s = pd.Series(s_list)
s

0    1.0
1    1.5
2    2.0
3    3.0
4    4.0
dtype: float64

### Dataframe- Row

In [18]:
import pandas as pd

# Redefining the sample DataFrame, new row, and insertion index

df_sample = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
new_row = pd.DataFrame({'A': [1.5], 'B': [3.5]})
index_to_insert = 1

# Splitting the DataFrame
df_top = df_sample.iloc[:index_to_insert]
df_bottom = df_sample.iloc[index_to_insert:]

# Concatenating the parts with the new row
df_updated = pd.concat([df_top, new_row, df_bottom], ignore_index=True)
df_updated


Unnamed: 0,A,B
0,1.0,3.0
1,1.5,3.5
2,2.0,4.0


### Dataframe: Column

In [19]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df.insert(1, 'A.5', [1.5, 2.5])  # Insert a column named 'A.5' at position 1
df

Unnamed: 0,A,A.5,B
0,1,1.5,3
1,2,2.5,4


#  How to Remove an item from the datastructure 

### List

In [20]:
lst = [1, 2, 3, 2]
lst.remove(2)  # This will remove the first occurrence of 2
lst

[1, 3, 2]

### np.Array

In [21]:
arr = np.array([1, 2, 3, 2])
arr = arr[arr != 2]  # This will remove all occurrences of 2
arr

array([1, 3])

In [22]:
arr = np.array([1, 2, 3, 2])
index_to_remove = np.where(arr == 2)[0][0]  # Find the index of the first occurrence of 2
arr = np.delete(arr, index_to_remove)  # Remove the element at that index
arr

array([1, 3, 2])

### Dictionary - Not Possible

In [23]:
dict_example = {'a': 1, 'b': 2, 'c': 3}
del dict_example['b']  # This will remove the key 'b' and its associated value
dict_example

{'a': 1, 'c': 3}

### Series

In [24]:
s = pd.Series([1, 2, 3, 2])
s = s[s != 2]  # This will remove all occurrences of 2
s

0    1
2    3
dtype: int64

In [25]:
s = pd.Series([1, 2, 3, 2])
# Removing the first occurrence of 2 in the Series
index_to_remove = (s == 2).idxmax()
s_first_removed = s.drop(index_to_remove)

s_first_removed

0    1
2    3
3    2
dtype: int64

### Dataframe- Row

In [26]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df = df[df['A'] != 2]
df

Unnamed: 0,A,B
0,1,4
2,3,6


### Dataframe: Column

In [27]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df.drop('A', axis=1, inplace=True)
df

Unnamed: 0,B
0,4
1,5
2,6


#  How to Clear the items from the datastructure 

### List

In [28]:
lst = [1, 2, 3]
lst.clear()
lst

[]

### np.Array

In [29]:
arr = np.array([1, 2, 3])
arr = np.array([])
arr

array([], dtype=float64)

### Dictionary - Not Possible

In [30]:
dict_example = {'a': 1, 'b': 2}
dict_example.clear()
dict_example

{}

### Series

In [31]:
s = pd.Series([1, 2, 3])
s = pd.Series([], dtype='object')
s

Series([], dtype: object)

### Dataframe- Row

In [32]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df = df[df['A'] != 2]
df

Unnamed: 0,A,B
0,1,4
2,3,6


### Dataframe

In [33]:
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df = pd.DataFrame()
df

#  How to Return the index of the first item from the datastructure 

### List

In [34]:
lst = [1, 2, 3, 2]
idx = lst.index(2)  # This will return the index of the first occurrence of 2
idx

1

### np.Array

In [35]:
arr = np.array([1, 2, 3, 2])
idx = np.where(arr == 2)[0][0]
idx

1

### Dictionary

In [36]:
dict_example = {'a': 1, 'b': 2, 'c': 3}
key_for_value = next(key for key, value in dict_example.items() if value == 2)
key_for_value

'b'

### Series

In [37]:
s = pd.Series([1, 2, 3, 2])
idx = (s == 2).idxmax()
idx

1

### Dataframe

In [38]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
idx = (df['A'] == 2).idxmax()
idx

1

#  How to Count the items from the datastructure 

### List

In [39]:
lst = [1, 2, 2, 3]
count = lst.count(2)  # This will return 2 because 2 appears twice
count

2

### np.Array

In [40]:
arr = np.array([1, 2, 2, 3])
count = np.count_nonzero(arr == 2)
count

2

### Dictionary

In [41]:
dict_example = {'a': 1, 'b': 2, 'c': 2}
count = list(dict_example.values()).count(2)
count

2

### Series

In [42]:
s = pd.Series([1, 2, 2, 3])
count = s.value_counts().get(2, 0)
count

2

### Dataframe

In [43]:
#for a specific column
df = pd.DataFrame({'A': [1, 2, 2], 'B': [4, 5, 6]})
count = df['A'].value_counts().get(2, 0)
count

2

In [44]:
# for the entire dataframe
count = (df == 2).sum().sum()
count

2

#  How to Sort the items in the datastructure 

### List

In [45]:
lst = [3, 1, 2]
lst.sort()  # Sorts in ascending order
lst

[1, 2, 3]

### np.Array

In [46]:
arr = np.array([3, 1, 2])
sorted_arr = np.sort(arr)
sorted_arr

array([1, 2, 3])

### Dictionary

In [47]:
dict_example = {'b': 2, 'a': 1, 'c': 3}
sorted_keys = sorted(dict_example.keys())
sorted_items = sorted(dict_example.items(), key=lambda x: x[1])  # Sort by value
print('sorted_keys',sorted_keys)
print('sorted_items',sorted_items)

sorted_keys ['a', 'b', 'c']
sorted_items [('a', 1), ('b', 2), ('c', 3)]


### Series

In [48]:
s = pd.Series([3, 1, 2])
sorted_s = s.sort_values()
sorted_s

1    1
2    2
0    3
dtype: int64

### Dataframe

In [49]:
df = pd.DataFrame({'A': [3, 1, 2], 'B': [2, 3, 1]})
sorted_df = df.sort_values(by='A')
sorted_df

Unnamed: 0,A,B
1,1,3
2,2,1
0,3,2


#  How to Reverse the items in the datastructure 

### List

In [50]:
lst = [1, 2, 3]
lst.reverse()
lst

[3, 2, 1]

### np.Array

In [51]:
arr = np.array([1, 2, 3])
reversed_arr = np.flip(arr)
reversed_arr

array([3, 2, 1])

### Dictionary

In [52]:
dict_example = {'a': 1, 'b': 2, 'c': 3}
reversed_dict = dict(reversed(dict_example.items()))
reversed_dict

{'c': 3, 'b': 2, 'a': 1}

### Series

In [53]:
s = pd.Series([1, 2, 3])
reversed_s = s.iloc[::-1]
reversed_s

2    3
1    2
0    1
dtype: int64

### Dataframe

In [54]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
reversed_df = df.iloc[::-1]
reversed_df

Unnamed: 0,A,B
2,3,6
1,2,5
0,1,4


#  How to Convert one to Another Data Structure

### List to Dictionary

In [55]:
lst = [('a', 1), ('b', 2)]
dict_obj = dict(lst)
dict_obj

{'a': 1, 'b': 2}

### Dictionary to List

In [56]:
dict_obj = {'a': 1, 'b': 2}
lst_keys = list(dict_obj.keys())
lst_values = list(dict_obj.values())
lst_keys,lst_values

(['a', 'b'], [1, 2])

### List to np.array

In [57]:
import numpy as np
lst = [1, 2, 3]
arr = np.array(lst)
arr

array([1, 2, 3])

### np.array to List

In [58]:
arr = np.array([1, 2, 3])
lst = arr.tolist()
lst


[1, 2, 3]

### List to Series

In [59]:
import pandas as pd
lst = [1, 2, 3]
series_obj = pd.Series(lst)
series_obj

0    1
1    2
2    3
dtype: int64

### Series to List

In [60]:
series_obj = pd.Series([1, 2, 3])
lst = series_obj.tolist()
lst

[1, 2, 3]

### Dictiionary to Series

In [61]:
dict_obj = {'a': 1, 'b': 2}
series_obj = pd.Series(dict_obj)
series_obj

a    1
b    2
dtype: int64

### Series to Dictionary

In [62]:
series_obj = pd.Series({'a': 1, 'b': 2})
dict_obj = series_obj.to_dict()
dict_obj

{'a': 1, 'b': 2}

### np.array to Series

In [63]:
arr = np.array([1, 2, 3])
series_obj = pd.Series(arr)
series_obj

0    1
1    2
2    3
dtype: int32

### Series to np.array

In [64]:
series_obj = pd.Series([1, 2, 3])
arr = series_obj.values
arr

array([1, 2, 3], dtype=int64)

### Dictonary to Dataframe

In [65]:
dict_obj = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(dict_obj)
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


###  Dataframe to Dictionary

In [66]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
dict_obj = df.to_dict()
dict_obj

{'col1': {0: 1, 1: 2}, 'col2': {0: 3, 1: 4}}

###  List of Dictionary to dataframe

In [67]:
lst_of_dicts = [{'col1': 1, 'col2': 3}, {'col1': 2, 'col2': 4}]
df = pd.DataFrame(lst_of_dicts)
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


###  Dataframe to List of Dictionaries

In [68]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
lst_of_dicts = df.to_dict('records')
lst_of_dicts

[{'col1': 1, 'col2': 3}, {'col1': 2, 'col2': 4}]

###  np.array to dataframe

In [69]:
arr = np.array([[1, 3], [2, 4]])
df = pd.DataFrame(arr, columns=['col1', 'col2'])
df

Unnamed: 0,col1,col2
0,1,3
1,2,4


###  Dataframe to np.array

In [70]:
df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
arr = df.values
arr

array([[1, 3],
       [2, 4]], dtype=int64)

###  Series to np.array

In [71]:
series_obj = pd.Series([1, 2, 3])
df = series_obj.to_frame()
df

Unnamed: 0,0
0,1
1,2
2,3


###  Series to Dataframe

In [72]:
series_obj = pd.Series([1, 2, 3])
df = series_obj.to_frame()
df

Unnamed: 0,0
0,1
1,2
2,3


###  Dataframe (Single Column) to series

In [73]:
df = pd.DataFrame({'col1': [1, 2, 3]})
series_obj = df['col1']
series_obj

0    1
1    2
2    3
Name: col1, dtype: int64

###  Single List to dataframe

In [74]:
import pandas as pd
lst = [1, 2, 3, 4, 5]
df = pd.DataFrame(lst, columns=['column_name'])
df

Unnamed: 0,column_name
0,1
1,2
2,3
3,4
4,5


###  List of Lists to dataframe

In [75]:
lst_of_lists = [[1, 'A'], [2, 'B'], [3, 'C']]
df = pd.DataFrame(lst_of_lists, columns=['col1', 'col2'])
df

Unnamed: 0,col1,col2
0,1,A
1,2,B
2,3,C


###  Dataframe (Single Column) to list

In [76]:
df = pd.DataFrame({'col1': [1, 2, 3]})
lst = df['col1'].tolist()
lst

[1, 2, 3]

### Entire Dataframe to list of lists

In [77]:
df = pd.DataFrame({'col1': [1, 2], 'col2': ['A', 'B']})
lst_of_lists = df.values.tolist()
lst_of_lists

[[1, 'A'], [2, 'B']]

## How to get the position of an element in a data structure
### List

In [78]:
lst = [10, 20, 30, 40, 50]
position = lst.index(30)  # returns 2
position

2

### Numpy Array

In [79]:
import numpy as np
arr = np.array([10, 20, 30, 40, 50])
positions = np.where(arr == 30)  # returns (array([2]),)
positions

(array([2], dtype=int64),)

### Dictionary

In [80]:
dct = {'a': 10, 'b': 20, 'c': 30}
keys_list = list(dct.keys())
position = keys_list.index('c')  # returns 2
position

2

### Series

In [81]:
import pandas as pd
s = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
position = s.index.get_loc('c')  # returns 2
position

2

### Dataframe: Row position

In [82]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['x', 'y', 'z'])
row_position = df.index.get_loc('y')  # returns 1
row_position

1

### Dataframe: Column Position

In [83]:
col_position = df.columns.get_loc('B')  # returns 1
col_position

1

## How to iterate through the elements in a data structure
### List

In [84]:
lst = [10, 20, 30, 40, 50]
for item in lst:
    print(item)

10
20
30
40
50


### np.array

In [85]:
import numpy as np
arr = np.array([10, 20, 30, 40, 50])
for item in arr:
    print(item)

10
20
30
40
50


### Dataframe: Column Position

In [86]:
col_position = df.columns.get_loc('B')  # returns 1
col_position

1

### Dictionary: For Keys

In [87]:
dct = {'a': 10, 'b': 20, 'c': 30}
for key in dct:
    print(key)

a
b
c


### Dictionary: For values

In [88]:
for value in dct.values():
    print(value)

10
20
30


### Dictionary: for both keys and values

In [89]:
for key, value in dct.items():
    print(key, value)

a 10
b 20
c 30


### Series

In [90]:
import pandas as pd
s = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
for index, value in s.items():
    print(index, value)


a 10
b 20
c 30
d 40
e 50


### Dataframe: for Each Column

In [91]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
for column, data in df.items():
    print(column)
    print(data)

A
0    1
1    2
2    3
Name: A, dtype: int64
B
0    4
1    5
2    6
Name: B, dtype: int64


### Dataframe: for each row

In [92]:
for index, row in df.iterrows():
    print(index)
    print(row)

0
A    1
B    4
Name: 0, dtype: int64
1
A    2
B    5
Name: 1, dtype: int64
2
A    3
B    6
Name: 2, dtype: int64


## Calculating union, intersection, difference, subset,superset, complement, symmetric difference and cartesian product for Data Structures
### List

In [93]:
# Sample lists
list1 = [1, 2, 3, 4]
list2 = [3, 4, 5, 6]
print('list1',list1)
print('list2',list2)

# Convert lists to sets for set operations
set1 = set(list1)
set2 = set(list2)

# Union
union_result = list(set1 | set2)

# Intersection
intersection_result = list(set1 & set2)

# Difference (elements in list1 but not in list2)
difference_list1 = list(set1 - set2)

# Difference (elements in list2 but not in list1)
difference_list2 = list(set2 - set1)

# Subset (check if list1 is a subset of list2)
is_subset = set1 <= set2

# Superset (check if list1 is a superset of list2)
is_superset = set1 >= set2

# Complement (relative to the union of the two lists)
universal_set = set1 | set2
complement_list1 = list(universal_set - set1)
complement_list2 = list(universal_set - set2)

# Symmetric Difference
symmetric_diff = list(set1 ^ set2)

# Cartesian Product
cartesian_product = [(a, b) for a in list1 for b in list2]

print('union:',union_result)
print('Intersection:',intersection_result)
print('Difference list1:', difference_list1)
print('Difference list2:', difference_list2)
print('Subset:', is_subset)
print('Superset:', is_superset)
print('Complement list1:', complement_list1)
print('complement list2:', complement_list2)
print('Symmetric Difference:', symmetric_diff)
print('Cartesian Product:', cartesian_product)

#union_result, intersection_result, difference_list1, difference_list2, is_subset, is_superset, complement_list1, complement_list2, symmetric_diff, cartesian_product


list1 [1, 2, 3, 4]
list2 [3, 4, 5, 6]
union: [1, 2, 3, 4, 5, 6]
Intersection: [3, 4]
Difference list1: [1, 2]
Difference list2: [5, 6]
Subset: False
Superset: False
Complement list1: [5, 6]
complement list2: [1, 2]
Symmetric Difference: [1, 2, 5, 6]
Cartesian Product: [(1, 3), (1, 4), (1, 5), (1, 6), (2, 3), (2, 4), (2, 5), (2, 6), (3, 3), (3, 4), (3, 5), (3, 6), (4, 3), (4, 4), (4, 5), (4, 6)]


### np.array

In [94]:
import numpy as np

# Sample arrays
arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([3, 4, 5, 6])
print('arr1:',arr1)
print('arr2:',arr2)

# Union
union_result = np.union1d(arr1, arr2)

# Intersection
intersection_result = np.intersect1d(arr1, arr2)

# Difference (elements in arr1 but not in arr2)
difference_arr1 = np.setdiff1d(arr1, arr2)

# Difference (elements in arr2 but not in arr1)
difference_arr2 = np.setdiff1d(arr2, arr1)

# Subset (check if arr1 is a subset of arr2)
is_subset = np.array_equal(arr1, np.intersect1d(arr1, arr2))

# Superset (check if arr1 is a superset of arr2)
is_superset = np.array_equal(arr2, np.intersect1d(arr1, arr2))

# Complement (relative to the union of the two arrays)
universal_array = np.union1d(arr1, arr2)
complement_arr1 = np.setdiff1d(universal_array, arr1)
complement_arr2 = np.setdiff1d(universal_array, arr2)

# Symmetric Difference
symmetric_diff = np.setxor1d(arr1, arr2)

# Cartesian Product
cartesian_product = np.transpose([np.tile(arr1, len(arr2)), np.repeat(arr2, len(arr1))])

print('union:',union_result)
print('Intersection:', intersection_result)
print('Difference arr1:',difference_arr1)
print('Difference arr2:', difference_arr2)
print('Subset:', is_subset)
print('Superset:', is_superset)
print('Complement arr1:', complement_arr1)
print('complement arr2:', complement_arr2)
print('Symmetric Difference:', symmetric_diff)
print('Cartesian Product:', cartesian_product)

#union_result, intersection_result, difference_arr1, difference_arr2, is_subset, is_superset, complement_arr1, complement_arr2, symmetric_diff, cartesian_product


arr1: [1 2 3 4]
arr2: [3 4 5 6]
union: [1 2 3 4 5 6]
Intersection: [3 4]
Difference arr1: [1 2]
Difference arr2: [5 6]
Subset: False
Superset: False
Complement arr1: [5 6]
complement arr2: [1 2]
Symmetric Difference: [1 2 5 6]
Cartesian Product: [[1 3]
 [2 3]
 [3 3]
 [4 3]
 [1 4]
 [2 4]
 [3 4]
 [4 4]
 [1 5]
 [2 5]
 [3 5]
 [4 5]
 [1 6]
 [2 6]
 [3 6]
 [4 6]]


### Dataframe

In [95]:
import pandas as pd
# Sample dataframes
df1 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['a', 'b', 'c']
})
print('df1:\n',df1)

df2 = pd.DataFrame({
    'A': [3, 4],
    'B': ['c', 'd']
})
print('df2:\n',df2)
print('---------------')
print('---------------')
# Union
union_df = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True)

# Intersection
intersection_df = df1.merge(df2, how='inner')

# Difference (rows in df1 but not in df2)
difference_df1 = df1.merge(df2, how='outer', indicator=True).loc[lambda x : x['_merge']=='left_only'].drop('_merge', axis=1)

# Difference (rows in df2 but not in df1)
difference_df2 = df2.merge(df1, how='outer', indicator=True).loc[lambda x : x['_merge']=='left_only'].drop('_merge', axis=1)

# Subset (check if df1 is a subset of df2)
is_subset = difference_df1.empty

# Superset (check if df1 is a superset of df2)
is_superset = difference_df2.empty

# Complement (relative to the union of the two dataframes)
universal_df = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True)
complement_df1 = universal_df.merge(df1, how='outer', indicator=True).loc[lambda x : x['_merge']=='left_only'].drop('_merge', axis=1)
complement_df2 = universal_df.merge(df2, how='outer', indicator=True).loc[lambda x : x['_merge']=='left_only'].drop('_merge', axis=1)

# Symmetric Difference
symmetric_diff = pd.concat([difference_df1, difference_df2]).reset_index(drop=True)

# Cartesian Product (Cross Join)
df1['key'] = 1
df2['key'] = 1
cartesian_product = pd.merge(df1, df2, on='key').drop('key', axis=1)

print('union:\n',union_df)
print('---------------')
print('Intersection:\n',intersection_df)
print('---------------')
print('Difference df1:\n',difference_df1)
print('---------------')
print('Difference df2:\n',difference_df2)
print('---------------')
print('Subset:',is_subset)
print('---------------')
print('Superset:\n',is_superset)
print('---------------')
print('Complement df1:\n',complement_df1)
print('---------------')
print('complement df2:\n',complement_df2)
print('---------------')
print('Symmetric Difference:\n',symmetric_diff)
print('---------------')
print('Cartesian Product:\n',cartesian_product)
print('---------------')

#union_df, intersection_df, difference_df1, difference_df2, is_subset, is_superset, complement_df1, complement_df2, symmetric_diff, cartesian_product


df1:
    A  B
0  1  a
1  2  b
2  3  c
df2:
    A  B
0  3  c
1  4  d
---------------
---------------
union:
    A  B
0  1  a
1  2  b
2  3  c
3  4  d
---------------
Intersection:
    A  B
0  3  c
---------------
Difference df1:
    A  B
0  1  a
1  2  b
---------------
Difference df2:
    A  B
1  4  d
---------------
Subset: False
---------------
Superset:
 False
---------------
Complement df1:
    A  B
3  4  d
---------------
complement df2:
    A  B
0  1  a
1  2  b
---------------
Symmetric Difference:
    A  B
0  1  a
1  2  b
2  4  d
---------------
Cartesian Product:
    A_x B_x  A_y B_y
0    1   a    3   c
1    1   a    4   d
2    2   b    3   c
3    2   b    4   d
4    3   c    3   c
5    3   c    4   d
---------------


### Dictionary

In [96]:
# Redefining the sample dictionaries
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = {'c': 30, 'd': 4}
print('dict1:',dict1)
print('dict2:',dict2)

# Recalculating the operations

# Union
union_dict = {**dict1, **dict2}

# Intersection
intersection_keys = dict1.keys() & dict2.keys()
intersection_dict = {k: dict2[k] for k in intersection_keys}

# Difference (keys in dict1 but not in dict2)
difference_dict1 = {k: dict1[k] for k in (dict1.keys() - dict2.keys())}

# Difference (keys in dict2 but not in dict1)
difference_dict2 = {k: dict2[k] for k in (dict2.keys() - dict1.keys())}

# Subset (check if dict1 is a subset of dict2 based on keys)
is_subset = dict1.keys() <= dict2.keys()

# Superset (check if dict1 is a superset of dict2 based on keys)
is_superset = dict1.keys() >= dict2.keys()

# Complement (relative to the union of the two dictionaries)
universal_keys = dict1.keys() | dict2.keys()
complement_dict1 = {k: union_dict[k] for k in (universal_keys - dict1.keys())}
complement_dict2 = {k: union_dict[k] for k in (universal_keys - dict2.keys())}

# Symmetric Difference (keys present in one dictionary but not in both)
symmetric_diff_keys = dict1.keys() ^ dict2.keys()
symmetric_diff = {k: union_dict[k] for k in symmetric_diff_keys}

# Cartesian Product (Not typical for dictionaries, but possible)
cartesian_product = [{'key1': k1, 'value1': v1, 'key2': k2, 'value2': v2} for k1, v1 in dict1.items() for k2, v2 in dict2.items()]

print('union:',union_dict)
print('Intersection:',intersection_dict)
print('Difference dict1:',difference_dict1)
print('Difference dict2:',difference_dict2)
print('Subset:',is_subset)
print('Superset:',is_superset)
print('Complement dict1:',complement_dict1)
print('complement dict2:',complement_dict2)
print('Symmetric Difference:',symmetric_diff)
print('Cartesian Product:',cartesian_product)


#union_dict, intersection_dict, difference_dict1, difference_dict2, is_subset, is_superset, complement_dict1, complement_dict2, symmetric_diff, cartesian_product


dict1: {'a': 1, 'b': 2, 'c': 3}
dict2: {'c': 30, 'd': 4}
union: {'a': 1, 'b': 2, 'c': 30, 'd': 4}
Intersection: {'c': 30}
Difference dict1: {'b': 2, 'a': 1}
Difference dict2: {'d': 4}
Subset: False
Superset: False
Complement dict1: {'d': 4}
complement dict2: {'b': 2, 'a': 1}
Symmetric Difference: {'d': 4, 'b': 2, 'a': 1}
Cartesian Product: [{'key1': 'a', 'value1': 1, 'key2': 'c', 'value2': 30}, {'key1': 'a', 'value1': 1, 'key2': 'd', 'value2': 4}, {'key1': 'b', 'value1': 2, 'key2': 'c', 'value2': 30}, {'key1': 'b', 'value1': 2, 'key2': 'd', 'value2': 4}, {'key1': 'c', 'value1': 3, 'key2': 'c', 'value2': 30}, {'key1': 'c', 'value1': 3, 'key2': 'd', 'value2': 4}]


### Series

In [97]:
import pandas as pd

# Sample series
s1 = pd.Series([1, 2, 3, 4])
s2 = pd.Series([3, 4, 5, 6])
print('s1:',s1)
print('s2:',s2)

# Convert series to sets for set operations
set1 = set(s1)
set2 = set(s2)

# Union
union_result = pd.Series(list(set1 | set2))

# Intersection
intersection_result = pd.Series(list(set1 & set2))

# Difference (elements in s1 but not in s2)
difference_s1 = pd.Series(list(set1 - set2))

# Difference (elements in s2 but not in s1)
difference_s2 = pd.Series(list(set2 - set1))

# Subset (check if s1 is a subset of s2)
is_subset = set1 <= set2

# Superset (check if s1 is a superset of s2)
is_superset = set1 >= set2

# Complement (relative to the union of the two series)
universal_set = set1 | set2
complement_s1 = pd.Series(list(universal_set - set1))
complement_s2 = pd.Series(list(universal_set - set2))

# Symmetric Difference
symmetric_diff = pd.Series(list(set1 ^ set2))

# Cartesian Product
cartesian_product = pd.DataFrame([(a, b) for a in s1 for b in s2], columns=['s1', 's2'])

print('union:\n',union_result)
print('Intersection:\n',intersection_result)
print('Difference s1:\n',difference_s1)
print('Difference s2:\n',difference_s2)
print('Subset:\n',is_subset)
print('Superset:\n',is_superset)
print('Complement s1:\n',complement_s1)
print('complement s2:\n',complement_s2)
print('Symmetric Difference:\n',symmetric_diff)
print('Cartesian Product:\n',cartesian_product)


#union_result, intersection_result, difference_s1, difference_s2, is_subset, is_superset, complement_s1, complement_s2, symmetric_diff, cartesian_product


s1: 0    1
1    2
2    3
3    4
dtype: int64
s2: 0    3
1    4
2    5
3    6
dtype: int64
union:
 0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64
Intersection:
 0    3
1    4
dtype: int64
Difference s1:
 0    1
1    2
dtype: int64
Difference s2:
 0    5
1    6
dtype: int64
Subset:
 False
Superset:
 False
Complement s1:
 0    5
1    6
dtype: int64
complement s2:
 0    1
1    2
dtype: int64
Symmetric Difference:
 0    1
1    2
2    5
3    6
dtype: int64
Cartesian Product:
     s1  s2
0    1   3
1    1   4
2    1   5
3    1   6
4    2   3
5    2   4
6    2   5
7    2   6
8    3   3
9    3   4
10   3   5
11   3   6
12   4   3
13   4   4
14   4   5
15   4   6


##  Iterating Index and Values in Various Data Structures
### List

In [104]:
my_list=[1,2,3,4]

for idx, value in enumerate(my_list):
    pass

### np.array

In [105]:
import numpy as np
arr1 = np.array([1, 2, 3, 4])
for idx, value in enumerate(arr1):
    pass


### Dataframe

In [108]:
import pandas as pd
# Sample dataframes
df1 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': ['a', 'b', 'c']
})
# Iterating over rows
for idx, row in df1.iterrows():
    pass
# Iterating over columns
for column_name,column_data in df1.items():
    pass

### Dictionary

In [110]:
# Redefining the sample dictionaries
dict1 = {'a': 1, 'b': 2, 'c': 3}
# Iterating over keys:
for key in dict1:
    pass
# Iterating over key-value pairs:
for key, value in my_dict.items():
    pass

### Series

In [113]:
import pandas as pd

# Sample series
s1 = pd.Series([1, 2, 3, 4])
for idx, value in s1.items():
    pass