In [1]:
!pip install numpy



In [2]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [5]:
import numpy as np
import pandas as pd

# Task 1: Three Different Methods for Creating Identical 2D Arrays in NumPy

# Method 1: Using np.array()
array1 = np.array([[1, 2, 3], [4, 5, 6]])

# Method 2: Using np.full()
array2 = np.full((2, 3), [1, 2, 3])

# Method 3: Using np.ones() and multiplication
array3 = np.ones((2, 3)) * [1, 2, 3]

print("Array 1:\n", array1)
print("Array 2:\n", array2)
print("Array 3:\n", array3)

# Task 2: Generate an array of 100 evenly spaced numbers between 1 and 10 and reshape into a 2D array

array_1d = np.linspace(1, 10, 100)
array_2d = array_1d.reshape((10, 10))

print("\n2D Array:\n", array_2d)

# Task 3: Explain the terms

# i. np.array, np.asarray, np.asanyarray
"""
np.array: Always copies the input data to create an array.
np.asarray: Converts the input to an array without copying if it's already an array.
np.asanyarray: Similar to np.asarray, but it preserves the subclass type if the input is an array subclass (e.g., matrix).
"""

# ii. Deep copy and shallow copy
"""
Deep copy: Creates a new array object and copies the elements recursively. Changes in the copy do not affect the original.
Shallow copy: Creates a new array object but references the same elements. Changes in the copy reflect in the original.
"""

# Task 4: Generate a 3x2 array with random floating-point numbers between 5 and 20, rounded to 2 decimal places

random_array = np.random.uniform(5, 20, (3, 2))
rounded_array = np.round(random_array, 2)

print("\nRounded Array:\n", rounded_array)

# Task 5: Create a NumPy array with random integers and extract even and odd integers

random_int_array = np.random.randint(1, 11, (5, 6))
even_integers = random_int_array[random_int_array % 2 == 0]
odd_integers = random_int_array[random_int_array % 2 != 0]

print("\nRandom Integer Array:\n", random_int_array)
print("Even Integers:\n", even_integers)
print("Odd Integers:\n", odd_integers)

# Task 6: Create a 3D array and perform operations

array_3d = np.random.randint(1, 11, (3, 3, 3))

# a) Find the indices of the maximum values along each depth level (third axis)
max_indices = np.argmax(array_3d, axis=2)

# b) Perform element-wise multiplication of the array by itself
elementwise_mult = array_3d * array_3d

print("\n3D Array:\n", array_3d)
print("Indices of Maximum Values:\n", max_indices)
print("Element-wise Multiplication:\n", elementwise_mult)

# Task 7: Clean and transform the 'Phone' column in the provided dataset

# Assuming people_data DataFrame is already defined
# For demonstration, create a sample DataFrame
people_data = pd.DataFrame({
    'Phone': ['+1-800-1234567', '(123) 456-7890', '555-1234'],
    'Last Name': ['Doe', 'Smith', 'Johnson'],
    'Gender': ['Female', 'Male', 'Female'],
    'Email': ['doe@example.com', 'smith@example.com', 'johnson@example.com'],
    'Salary': [70000, 80000, 90000]
})

# Cleaning Phone column to keep only numeric characters and convert to numeric type
people_data['Phone'] = people_data['Phone'].str.replace(r'\D', '', regex=True).astype(float)

# Display the table attributes and data types of each column
print("\nDataset Info After Cleaning 'Phone' Column:\n")
print(people_data.info())

# Task 8: Perform tasks using the provided dataset

# a) Read the 'data.csv' file using pandas, skipping the first 50 rows
# Assuming people_data is the dataset to work on (here, it's already created)
filtered_data = people_data.iloc[50:]

# b) Only read the columns: 'Last Name', 'Gender', 'Email', 'Phone', and 'Salary'
filtered_columns = filtered_data[['Last Name', 'Gender', 'Email', 'Phone', 'Salary']]

# c) Display the first 10 rows of the filtered dataset
print("\nFirst 10 Rows of Filtered Data:\n", filtered_columns.head(10))

# d) Extract the ‘Salary’ column as a Series and display its last 5 values
salary_series = filtered_columns['Salary']
print("\nLast 5 Values of Salary Column:\n", salary_series.tail(5))

# Task 9: Filter and select rows from the dataset

filtered_rows = people_data[(people_data['Last Name'].str.contains('Duke')) &
                            (people_data['Gender'] == 'Female') &
                            (people_data['Salary'] < 85000)]

print("\nFiltered Rows:\n", filtered_rows)

# Task 10: Create a 7x5 DataFrame in Pandas using a series generated from 35 random integers between 1 to 6

random_integers = np.random.randint(1, 7, 35)
dataframe_7x5 = pd.DataFrame(random_integers.reshape(7, 5))

print("\n7x5 DataFrame:\n", dataframe_7x5)

Array 1:
 [[1 2 3]
 [4 5 6]]
Array 2:
 [[1 2 3]
 [1 2 3]]
Array 3:
 [[1. 2. 3.]
 [1. 2. 3.]]

2D Array:
 [[ 1.          1.09090909  1.18181818  1.27272727  1.36363636  1.45454545
   1.54545455  1.63636364  1.72727273  1.81818182]
 [ 1.90909091  2.          2.09090909  2.18181818  2.27272727  2.36363636
   2.45454545  2.54545455  2.63636364  2.72727273]
 [ 2.81818182  2.90909091  3.          3.09090909  3.18181818  3.27272727
   3.36363636  3.45454545  3.54545455  3.63636364]
 [ 3.72727273  3.81818182  3.90909091  4.          4.09090909  4.18181818
   4.27272727  4.36363636  4.45454545  4.54545455]
 [ 4.63636364  4.72727273  4.81818182  4.90909091  5.          5.09090909
   5.18181818  5.27272727  5.36363636  5.45454545]
 [ 5.54545455  5.63636364  5.72727273  5.81818182  5.90909091  6.
   6.09090909  6.18181818  6.27272727  6.36363636]
 [ 6.45454545  6.54545455  6.63636364  6.72727273  6.81818182  6.90909091
   7.          7.09090909  7.18181818  7.27272727]
 [ 7.36363636  7.45454545  7

In [6]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from bokeh.plotting import figure, show, output_notebook
from bokeh.io import output_file
from bokeh.models import HoverTool
import plotly.graph_objs as go
import plotly.express as px



# Ensure Bokeh output is displayed in the notebook
output_notebook()

# Task 11: Create two Series and a DataFrame
# a) First Series with random numbers between 10 and 50
series1 = pd.Series(np.random.randint(10, 51, size=50))

# b) Second Series with random numbers between 100 and 1000
series2 = pd.Series(np.random.randint(100, 1001, size=50))

# c) Create DataFrame with joined Series and rename columns
df = pd.DataFrame({'col1': series1, 'col2': series2})

# Task 12: Operations on the people dataset

# Sample DataFrame creation for demonstration
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Email': ['alice@example.com', 'bob@example.com', 'charlie@example.com', 'david@example.com'],
    'Phone': ['123-4567', '234-5678', '345-6789', '456-7890'],
    'Date of birth': ['1990-01-01', '1985-05-23', '1978-11-12', '2000-07-15'],
    'Address': ['123 Apple St', '456 Orange Ave', '789 Banana Blvd', '101 Grape Ln']
}
people = pd.DataFrame(data)

# a) Delete the 'Email', 'Phone', and 'Date of birth' columns
people = people.drop(columns=['Email', 'Phone', 'Date of birth'])

# b) Delete rows with missing values
people = people.dropna()

# c) Print the final output
print("People DataFrame after deletions:")
print(people)

# Task 13: Scatter Plot using Matplotlib and NumPy
# a) Create two random NumPy arrays x and y
x = np.random.rand(100)
y = np.random.rand(100)

# b) Create scatter plot
plt.scatter(x, y, color='red', marker='o', label='Scatter Points')

# c) Add a horizontal line at y = 0.5
plt.axhline(y=0.5, color='blue', linestyle='--', label='y = 0.5')

# d) Add a vertical line at x = 0.5
plt.axvline(x=0.5, color='green', linestyle=':', label='x = 0.5')

# e) Label the axes and set the title
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Advanced Scatter Plot of Random Values')

# f) Display legend
plt.legend()
plt.show()

# Task 14: Time-series Data Plot
# Create a time-series DataFrame
dates = pd.date_range(start='2023-01-01', periods=100)
temperature = np.random.uniform(20, 30, size=100)
humidity = np.random.uniform(40, 60, size=100)
time_series_df = pd.DataFrame({'Date': dates, 'Temperature': temperature, 'Humidity': humidity})

# a) Plot Temperature and Humidity on the same plot
fig, ax1 = plt.subplots()

ax1.set_xlabel('Date')
ax1.set_ylabel('Temperature', color='tab:red')
ax1.plot(time_series_df['Date'], time_series_df['Temperature'], color='tab:red')

ax2 = ax1.twinx()
ax2.set_ylabel('Humidity', color='tab:blue')
ax2.plot(time_series_df['Date'], time_series_df['Humidity'], color='tab:blue')

# b) Set title
plt.title('Temperature and Humidity Over Time')
plt.show()

# Task 15: Histogram with PDF Overlay
# a) Generate data
data = np.random.randn(1000)

# b) Plot histogram with 30 bins
count, bins, ignored = plt.hist(data, 30, density=True, alpha=0.6, color='g')

# c) Overlay PDF
pdf = (1 / (np.sqrt(2 * np.pi))) * np.exp(-0.5 * bins**2)
plt.plot(bins, pdf, linewidth=2, color='r')

# d) Label the axes and set the title
plt.xlabel('Value')
plt.ylabel('Frequency/Probability')
plt.title('Histogram with PDF Overlay')
plt.show()

# Task 17: Seaborn Scatter Plot
# Create two random arrays
x = np.random.rand(100)
y = np.random.rand(100)

# Determine quadrants
quadrant = np.where((x > 0.5) & (y > 0.5), 'Q1', 
                    np.where((x <= 0.5) & (y > 0.5), 'Q2',
                             np.where((x <= 0.5) & (y <= 0.5), 'Q3', 'Q4')))

# Create scatter plot
sns.scatterplot(x=x, y=y, hue=quadrant)
plt.axhline(0.5, color='gray', linestyle='--')
plt.axvline(0.5, color='gray', linestyle='--')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Quadrant-wise Scatter Plot')
plt.legend()
plt.show()

# Task 18: Bokeh Sine Wave Function
# Generate sine wave data
x = np.linspace(0, 4*np.pi, 100)
y = np.sin(x)

# Create a Bokeh plot
p = figure(title='Sine Wave Function', x_axis_label='X-axis', y_axis_label='Y-axis')
p.line(x, y, legend_label="Sine Wave", line_width=2)

# Add grid lines
p.xgrid.grid_line_color = 'gray'
p.ygrid.grid_line_color = 'gray'

show(p)

# Task 19: Bokeh Bar Chart with Categorical Data
# Generate random categorical data
categories = ['A', 'B', 'C', 'D', 'E']
values = np.random.randint(10, 100, size=5)

# Create Bokeh bar chart
p = figure(x_range=categories, title='Random Categorical Bar Chart', toolbar_location=None, tools='')

p.vbar(x=categories, top=values, width=0.9, color=['navy', 'green', 'red', 'orange', 'purple'])

# Add hover tooltips
hover = HoverTool()
hover.tooltips = [('Category', '@x'), ('Value', '@top')]
p.add_tools(hover)

# Label axes
p.xaxis.axis_label = 'Category'
p.yaxis.axis_label = 'Value'

show(p)

# Task 20: Plotly Simple Line Plot
# Generate random dataset
random_data = np.random.rand(100)

# Create a basic line plot
fig = go.Figure(go.Scatter(x=list(range(100)), y=random_data, mode='lines'))

# Set the title and labels
fig.update_layout(title='Simple Line Plot', xaxis_title='X-axis', yaxis_title='Y-axis')
fig.show()

# Task 21: Plotly Interactive Pie Chart
# Generate random data
labels = ['Category A', 'Category B', 'Category C', 'Category D']
values = np.random.randint(10, 50, size=4)

# Create an interactive pie chart
fig = px.pie(values=values, names=labels, title='Interactive Pie Chart')

# Add percentages
fig.update_traces(textinfo='percent+label')
fig.show()

ModuleNotFoundError: No module named 'plotly'