In [3]:
import numpy as np

In [4]:
# 1. Creating arrays
# np.array() creates an array from a list or tuple
arr = np.array([1, 2, 3, 4, 5])
print("Array:", arr)

Array: [1 2 3 4 5]


In [5]:
# 2. Creating arrays with specific values
# np.zeros() creates an array filled with zeros
zeros = np.zeros((3, 3))
print("Zeros:\n", zeros)

Zeros:
 [[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [6]:
# np.ones() creates an array filled with ones
ones = np.ones((2, 4))
print("Ones:\n", ones)

Ones:
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]]


In [7]:
# np.full() creates an array filled with a specified value
full = np.full((2, 2), 7)
print("Full:\n", full)


Full:
 [[7 7]
 [7 7]]


In [8]:
# np.eye() creates an identity matrix
identity = np.eye(3)
print("Identity Matrix:\n", identity)

Identity Matrix:
 [[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [9]:
# np.arange() creates an array with a range of values
arange = np.arange(0, 10, 2)
print("Arange:", arange)

Arange: [0 2 4 6 8]


In [56]:
# np.linspace() creates an array with evenly spaced values over a specified interval
linspace = np.linspace(0, 1, 10)
print("Linspace:", linspace)

Linspace: [0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]


In [53]:
# 3. Reshaping arrays
# np.reshape() reshapes an array without changing its data
reshaped = arr.reshape((1, 5))
print("Reshaped:\n", reshaped)

Reshaped:
 [[1 2 3 4 5]]


In [12]:
# 4. Basic operations
# np.add(), np.subtract(), np.multiply(), np.divide() perform element-wise operations
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print("Addition:", np.add(a, b))
print("Subtraction:", np.subtract(a, b))
print("Multiplication:", np.multiply(a, b))
print("Division:", np.divide(a, b))

Addition: [5 7 9]
Subtraction: [-3 -3 -3]
Multiplication: [ 4 10 18]
Division: [0.25 0.4  0.5 ]


In [13]:
# np.dot() performs dot product of two arrays
dot_product = np.dot(a, b)
print("Dot Product:", dot_product)


Dot Product: 32


In [14]:
# 5. Statistical operations
# np.mean(), np.median(), np.std() compute mean, median, and standard deviation
print("Mean:", np.mean(a))
print("Median:", np.median(a))
print("Standard Deviation:", np.std(a))

Mean: 2.0
Median: 2.0
Standard Deviation: 0.816496580927726


In [21]:
# 6. Indexing and slicing
# Accessing elements using indices
print("Element at index 1:", a[1])
# Slicing arrays
print("Sliced array:", a[1:3])

Element at index 1: 2
Sliced array: [2 3]


In [16]:
# 7. Conditional selection
# np.where() returns elements chosen from x or y depending on condition
condition = a > 2
print("Condition:", condition)
print("Elements greater than 2:", a[condition])
print("Elements with np.where:", np.where(a > 2, a, -1))

Condition: [False False  True]
Elements greater than 2: [3]
Elements with np.where: [-1 -1  3]


In [22]:
# 8. Random functions
# np.random.random() generates an array of random floats between 0 and 1
random_floats = np.random.random((2, 2))
print("Random Floats:\n", random_floats)

Random Floats:
 [[0.40525292 0.2671577 ]
 [0.52300607 0.3492583 ]]


In [23]:
# np.random.randint() generates an array of random integers within a specified range
random_ints = np.random.randint(0, 10, (2, 3))
print("Random Integers:\n", random_ints)

Random Integers:
 [[1 0 3]
 [7 9 2]]


In [24]:
# 9. Aggregation functions
# np.sum(), np.prod(), np.cumsum() compute sum, product, and cumulative sum
print("Sum:", np.sum(a))
print("Product:", np.prod(a))
print("Cumulative Sum:", np.cumsum(a))

Sum: 6
Product: 6
Cumulative Sum: [1 3 6]


In [25]:
# 10. Concatenation and stacking
# np.concatenate() joins a sequence of arrays along an existing axis
concatenated = np.concatenate((a, b))
print("Concatenated:", concatenated)

Concatenated: [1 2 3 4 5 6]


In [26]:
# np.vstack() stacks arrays in sequence vertically (row-wise)
vstacked = np.vstack((a, b))
print("Vstacked:\n", vstacked)

Vstacked:
 [[1 2 3]
 [4 5 6]]


In [27]:
# np.hstack() stacks arrays in sequence horizontally (column-wise)
hstacked = np.hstack((a, b))
print("Hstacked:", hstacked)

Hstacked: [1 2 3 4 5 6]


In [28]:
import pandas as pd

In [29]:
# 1. Creating DataFrames
# Creating a DataFrame from a dictionary
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 35, 40],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']}
df = pd.DataFrame(data)
print("DataFrame:\n", df)

DataFrame:
       Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
3    David   40      Houston


In [30]:
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago
3,David,40,Houston


In [57]:
# 2. Basic DataFrame Operations
# Viewing the first few rows of the DataFrame
df.head(2)

Unnamed: 0,Name,Age,Salary
0,Alice,26,50000
1,Bob,30,60000


In [59]:
# Viewing the last few rows of the DataFrame
df.tail(2)

Unnamed: 0,Name,Age,Salary
2,Charlie,35,70000
3,David,40,80000


In [37]:
# Getting basic information about the DataFrame
print("Info:\n", df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes
Info:
 None


In [38]:
df.describe()

Unnamed: 0,Age
count,4.0
mean,32.5
std,6.454972
min,25.0
25%,28.75
50%,32.5
75%,36.25
max,40.0


In [60]:
df[['Name', 'Salary']]

Unnamed: 0,Name,Salary
0,Alice,50000
1,Bob,60000
2,Charlie,70000
3,David,80000


In [42]:
df.iloc[0]

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [63]:
df[df['Age'] > 20]

Unnamed: 0,Name,Age,Salary
0,Alice,26,50000
1,Bob,30,60000
2,Charlie,35,70000
3,David,40,80000


In [44]:
df['Salary'] = [50000, 60000, 70000, 80000]
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000


In [45]:
df.loc[0, 'Age'] = 26
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,26,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000
3,David,40,Houston,80000


In [46]:
df = df.drop(columns=['City'])

In [47]:
df

Unnamed: 0,Name,Age,Salary
0,Alice,26,50000
1,Bob,30,60000
2,Charlie,35,70000
3,David,40,80000


In [None]:
# 5. Handling Missing Data
# Creating a DataFrame with missing values
data_with_nan = {'Name': ['Alice', 'Bob', None, 'David'],
                 'Age': [25, None, 35, 40],
                 'City': ['New York', 'Los Angeles', 'Chicago', None]}
df_nan = pd.DataFrame(data_with_nan)
print("DataFrame with NaN:\n", df_nan)

# Checking for missing values
print("Missing values:\n", df_nan.isnull())

# Dropping rows with missing values
print("Drop rows with NaN:\n", df_nan.dropna())

# Filling missing values
print("Fill NaN with 'Unknown':\n", df_nan.fillna('Unknown'))

# 6. Grouping and Aggregating Data
# Grouping by a column and calculating the mean
grouped = df.groupby('Name').mean()
print("Grouped by Name (mean):\n", grouped)

# Aggregating data
aggregated = df.groupby('Name').agg({'Age': 'mean', 'Salary': 'sum'})
print("Aggregated:\n", aggregated)

# 7. Merging and Joining DataFrames
# Creating another DataFrame
data2 = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
         'Department': ['HR', 'Finance', 'Engineering', 'Marketing']}
df2 = pd.DataFrame(data2)

# Merging DataFrames
merged = pd.merge(df, df2, on='Name')
print("Merged DataFrame:\n", merged)

# 8. Reshaping DataFrames
# Pivoting DataFrames
pivoted = merged.pivot(index='Name', columns='Department', values='Salary')
print("Pivoted DataFrame:\n", pivoted)

# Melting DataFrames
melted = pd.melt(df, id_vars=['Name'], value_vars=['Age', 'Salary'])
print("Melted DataFrame:\n", melted)

# 9. Saving DataFrames
# Saving to a CSV file
# df.to_csv('output.csv', index=False)

# Saving to an Excel file
# df.to_excel('output.xlsx', index=False)