# Data Manipulation and Analysis
###### **NumPy** Numerical computing library for arrays and matrices.
###### **pandas** Data manipulation and analysis library with DataFrame objects.
###### **SciPy** Scientific computing library built on top of NumPy.
###### **Matplotlib** Plotting and data visualization library.
###### **Seaborn** Statistical data visualization based on Matplotlib.
###### **Plotly** Interactive and web-based plotting library.

In [10]:
# dir(np)

In [18]:
import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
c = a + b  # Element-wise addition

arr = np.array([0, 1, 2, 3, 4, 5])
subset = arr[2:5]  # Slice from index 2 to 4

arr = np.array([10, 20, 30, 40, 50])
mean = np.mean(arr)
std_dev = np.std(arr)

A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
product = np.dot(A, B)  # Matrix multiplication

import matplotlib.pyplot as plt
# image = plt.imread("image.jpg")
# inverted_image = 255 - image  # Invert image colors


In [13]:
import pandas as pd

###### 1. Loading and Reading Data
df = pd.read_csv('data.csv')  

###### 2. Data Exploration
print(df.head())  
print(df.info())  
print(df.describe())  

###### 3. Data Cleaning
cleaned_df = df.dropna()  
filled_df = df.fillna(0)  

###### 4. Data Selection and Indexing
selected_column = df['column_name']  
filtered_rows = df[df['age'] > 25]  

###### 5. Grouping and Aggregation
group_by_gender = df.groupby('gender')  
mean_age_by_gender = group_by_gender['age'].mean()  

###### 6. Data Transformation
df['new_column'] = df['column1'] + df['column2']
df['category'] = df['age'].apply(lambda x: 'young' if x < 30 else 'old')  

###### 7. Merging and Joining
merged_df = pd.merge(df1, df2, on='common_column')  

###### 8. Time Series Analysis
time_series = pd.date_range(start='2023-01-01', periods=30, freq='D')  
df['date_column'] = time_series

###### 9. Reshaping Data
pivoted = df.pivot(index='date', columns='category', values='value')  
melted = df.melt(id_vars=['id', 'name'], value_vars=['col1', 'col2'])  

###### 10. Data Visualization (using Matplotlib)
import matplotlib.pyplot as plt  
df.plot(x='date', y='value', kind='line')  
plt.show()  



### scipy

import numpy as np  
from scipy import stats, optimize, interpolate, signal, linalg, integrate, spatial, cluster  

###### 1. Statistical Functions
data = np.array([1, 2, 3, 4, 5])  
mean = np.mean(data)  
median = np.median(data)  
std_dev = np.std(data)  

###### 2. Optimization
def objective(x):  
    return x**2 + 10 * np.sin(x)  
result = optimize.minimize(objective, x0=0)  

###### 3. Interpolation
x = np.array([1, 2, 3, 4, 5])  
y = np.array([10, 8, 6, 4, 2])  
f = interpolate.interp1d(x, y, kind='linear')  

###### 4. Signal Processing
signal_data = np.random.random(100)  
filtered_data = signal.savgol_filter(signal_data, window_length=5, polyorder=2)  

###### 5. Linear Algebra
A = np.array([[1, 2], [3, 4]])  
b = np.array([5, 6])  
x = linalg.solve(A, b)  

###### 6. Integration
result, _ = integrate.quad(lambda x: x**2, 0, 1)  

###### 7. Spatial Operations
points = np.random.random((10, 2))  
tree = spatial.cKDTree(points)  
nearest_distance, nearest_index = tree.query([0.5, 0.5])  

###### 8. Clustering
data_points = np.random.random((20, 2))  
labels = cluster.KMeans(n_clusters=3).fit_predict(data_points)  

###### 9. Fourier Transforms
time = np.linspace(0, 1, 1000)  
signal = np.sin(2 * np.pi * 5 * time)  
frequency, amplitude = signal.periodogram(signal)  

###### 10. Statistical Distributions
rv = stats.norm(loc=0, scale=1)  
random_samples = rv.rvs(size=100)  



### matplotlib

import numpy as np  
import matplotlib.pyplot as plt  

##### 1. Basic Line Plot
x = np.linspace(0, 10, 100)  
y = np.sin(x)  
plt.plot(x, y)  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Sine Function')  
plt.show()  

##### 2. Scatter Plot
x = np.random.rand(50)  
y = np.random.rand(50)  
plt.scatter(x, y, c='blue', marker='o')  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Scatter Plot')  
plt.show()  

##### 3. Bar Plot
categories = ['A', 'B', 'C']  
values = [20, 35, 50]  
plt.bar(categories, values)  
plt.xlabel('Categories')  
plt.ylabel('Values')  
plt.title('Bar Plot')  
plt.show()  

##### 4. Histogram
data = np.random.normal(0, 1, 1000)  
plt.hist(data, bins=20)  
plt.xlabel('Value')  
plt.ylabel('Frequency')  
plt.title('Histogram')  
plt.show()  

##### 5. Pie Chart
sizes = [30, 40, 20, 10]  
labels = ['A', 'B', 'C', 'D']  
plt.pie(sizes, labels=labels, autopct='%1.1f%%')  
plt.title('Pie Chart')  
plt.show()  

##### 6. Box Plot
data = np.random.normal(0, 1, 100)  
plt.boxplot(data)  
plt.ylabel('Value')  
plt.title('Box Plot')  
plt.show()  

##### 7. Subplots
fig, axs = plt.subplots(2, 2, figsize=(8, 8))  
axs[0, 0].plot(x, y)  
axs[0, 1].scatter(x, y)  
axs[1, 0].bar(categories, values)  
axs[1, 1].hist(data, bins=20)  
plt.show()  

##### 8. Annotations and Text
x = np.linspace(0, 10, 100)  
y = np.sin(x)  
plt.plot(x, y)  
plt.annotate('Peak', xy=(np.pi/2, 1), xytext=(3, 1.5),  
             arrowprops=dict(facecolor='black', shrink=0.05))  
plt.text(6, -0.5, 'Minimum', fontsize=12, color='red')  
plt.show()  

##### 9. Adding Legends
x = np.linspace(0, 10, 100)  
y1 = np.sin(x)  
y2 = np.cos(x)  
plt.plot(x, y1, label='Sine')  
plt.plot(x, y2, label='Cosine')  
plt.legend()  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Sine and Cosine Functions')  
plt.show()  

##### 10. Customizing Styles
plt.style.use('ggplot')  
x = np.linspace(0, 10, 100)  
y = np.sin(x)  
plt.plot(x, y)  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Sine Function')  
plt.show()


### Plotly 

import numpy as np  
import plotly.express as px  
import plotly.graph_objects as go  

#### 1. Basic Line Plot
x = np.linspace(0, 10, 100)  
y = np.sin(x)  
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines'))  
fig.update_layout(title='Sine Function')  
fig.show()  

#### 2. Scatter Plot
x = np.random.rand(50)  
y = np.random.rand(50)  
fig = px.scatter(x=x, y=y, title='Scatter Plot')  
fig.show()  

#### 3. Bar Plot
categories = ['A', 'B', 'C']  
values = [20, 35, 50]  
fig = px.bar(x=categories, y=values, title='Bar Plot')  
fig.show()  

#### 4. Histogram
data = np.random.normal(0, 1, 1000)  
fig = px.histogram(x=data, title='Histogram')  
fig.show()  

#### 5. Pie Chart
sizes = [30, 40, 20, 10]  
labels = ['A', 'B', 'C', 'D']  
fig = px.pie(values=sizes, names=labels, title='Pie Chart')  
fig.show()  

#### 6. Box Plot
data = np.random.normal(0, 1, 100)  
fig = px.box(y=data, title='Box Plot')  
fig.show()  

#### 7. Subplots
fig = make_subplots(rows=2, cols=2)  
fig.add_trace(go.Scatter(x=x, y=y, mode='lines'), row=1, col=1)  
fig.add_trace(px.scatter(x=x, y=y).data[0], row=1, col=2)  
fig.add_trace(px.bar(x=categories, y=values).data[0], row=2, col=1)  
fig.add_trace(px.histogram(x=data).data[0], row=2, col=2)  
fig.show()  

#### 8. Annotations and Text
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines'))  
fig.add_annotation(text='Peak', x=np.pi/2, y=1, arrowhead=2, arrowcolor='black')  
fig.add_annotation(text='Minimum', x=6, y=-0.5, font=dict(color='red'))  
fig.update_layout(title='Sine Function')  
fig.show()

#### 9. Adding Legends
fig = go.Figure()  
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name='Sine'))  
fig.add_trace(go.Scatter(x=x, y=-y, mode='lines', name='Negative Sine'))  
fig.update_layout(title='Sine Functions', xaxis_title='X-axis', yaxis_title='Y-axis')  
fig.show()  

#### 10. 3D Surface Plot
x = np.linspace(-5, 5, 50)  
y = np.linspace(-5, 5, 50)  
X, Y = np.meshgrid(x, y)  
Z = np.sin(np.sqrt(X**2 + Y**2))  
fig = go.Figure(data=[go.Surface(z=Z, x=x, y=y)])  
fig.update_layout(title='3D Surface Plot')  
fig.show()


## seaborn

import numpy as np  
import seaborn as sns  
import matplotlib.pyplot as plt  

#### Set Seaborn style
sns.set(style='whitegrid')  

#### 1. Basic Line Plot
x = np.linspace(0, 10, 100)  
y = np.sin(x)  
sns.lineplot(x=x, y=y)  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Sine Function')  
plt.show()  

#### 2. Scatter Plot
x = np.random.rand(50)  
y = np.random.rand(50)  
sns.scatterplot(x=x, y=y)  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Scatter Plot')  
plt.show()  

#### 3. Bar Plot
categories = ['A', 'B', 'C']  
values = [20, 35, 50]  
sns.barplot(x=categories, y=values)  
plt.xlabel('Categories')  
plt.ylabel('Values')  
plt.title('Bar Plot')  
plt.show()  

#### 4. Histogram
data = np.random.normal(0, 1, 1000)  
sns.histplot(data)  
plt.xlabel('Value')  
plt.ylabel('Frequency')  
plt.title('Histogram')  
plt.show()  

#### 5. Pie Chart
sizes = [30, 40, 20, 10]  
labels = ['A', 'B', 'C', 'D']  
plt.pie(sizes, labels=labels, autopct='%1.1f%%')  
plt.title('Pie Chart')  
plt.show()  

#### 6. Box Plot
data = np.random.normal(0, 1, 100)  
sns.boxplot(y=data)  
plt.ylabel('Value')  
plt.title('Box Plot')  
plt.show()  

#### 7. Subplots (using Matplotlib)
fig, axs = plt.subplots(2, 2, figsize=(8, 8))  
sns.lineplot(x=x, y=y, ax=axs[0, 0])  
sns.scatterplot(x=x, y=y, ax=axs[0, 1])  
sns.barplot(x=categories, y=values, ax=axs[1, 0])  
sns.histplot(data, ax=axs[1, 1])  
plt.show()  

#### 8. Annotations and Text
sns.lineplot(x=x, y=y)  
plt.annotate('Peak', xy=(np.pi/2, 1), xytext=(3, 1.5),  
             arrowprops=dict(facecolor='black', shrink=0.05))  
plt.text(6, -0.5, 'Minimum', fontsize=12, color='red')  
plt.title('Sine Function')  
plt.show()  

#### 9. Adding Legends (using Matplotlib)
sns.lineplot(x=x, y=y, label='Sine')  
sns.lineplot(x=x, y=-y, label='Negative Sine')  
plt.legend()  
plt.xlabel('X-axis')  
plt.ylabel('Y-axis')  
plt.title('Sine Functions')  
plt.show()  

#### 10. Pair Plot
iris = sns.load_dataset('iris')  
sns.pairplot(iris, hue='species')  
plt.title('Pair Plot')  
plt.show()
