In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
# 2. Basic example

In [None]:
import numpy as np
x = np.linspace(0, 5, 11) 
y = x ** 2

In [None]:
x

In [None]:
y

In [None]:
# 3. Basic Matplotlib Commands

In [None]:
plt.plot(x, y, 'r') # 'r is the color red
plt.xlabel('X Axis Title Here')
plt.ylabel('Y Axis Title Here')
plt.title('String Title Here')
plt.show()

In [None]:
# 4. Creating Multiplots on Same Canvas

In [None]:
# plt.subplot(nrows, ncols, plot_number)
plt.subplot(1,2,1)
plt.plot(x, y, 'r--') # More on color options later
plt.subplot(1,2,2)
plt.plot(y, x, 'g*-');

In [None]:
# 5. Object Oriented Method

In [None]:
fig = plt.figure()
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.plot(x, y, 'b')
axes.set_xlabel('Set X Label')
axes.set_ylabel('Set Y Label')
axes.set_title('Set Title')

In [None]:
fig = plt.figure()
axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3])

axes1.plot(x, y, 'b')
axes1.set_xlabel('X-label_axes2')
axes1.set_ylabel('Y_label_axes2')
axes2.set_title('Axes 2 Title')

axes2.plot(y, x, 'r')
axes2.set_xlabel('X_label_axes2')
axes2.set_ylabel('Y_label_axes2')
axes2.set_title('Axes 2 Title');

In [None]:
# 6. Suplot()

In [None]:
fig, axes = plt.subplots()

axes.plot(x, y, 'r')
axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_title('title');

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2)

In [None]:
axes

In [None]:
for ax in axes:
  ax.plot(x, y, 'b')
  ax.set_ylabel('y')
  ax.set_title('title')
fig

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2)

for ax in axes:
  ax.plot(x, y, 'g')
  ax.set_xlabel('x')
  ax.set_ylabel('y')
  ax.set_title('title')
fig
plt.tight_layout()  

In [None]:
# 7. Figure size, aspect ratio and DPI

In [None]:
fig = plt.figure(figsize=(8,4), dpi=100)


In [None]:
fig, axes = plt.subplots(figsize=(12, 3))

axes.plot(x, y, 'r')
axes.set_xlabel('x')
axes.set_ylabel('y')
axes.set_title('title');

In [None]:
fig.savefig("filename.png")

In [None]:
fig.savefig("filename.png", dpi=200)

In [None]:
# 9. Figure titles

In [None]:
ax.set_title("title");

In [None]:
# 10. Axis labels

In [None]:
ax.set_xlabel("x")
ax.set_ylabel("y")

In [None]:
# 11. Legends

In [None]:
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])

ax.plot(x, x**2, label="x**2")
ax.plot(x, x**3, label="x**3")
ax.legend()

In [None]:
# 12. Plot range

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

axes[0].plot(x, x**2, x**3)
axes[0].set_title("default axes ranges")

axes[1].plot(x, x**2, x, x**3)
axes[1].axis('tight')
axes[1].set_title("tight axes")

axes[2].plot(x, x**2, x, x**3)
axes[2].set_ylim([0, 60])
axes[2].set_xlim([2, 5])
axes[2].set_title("custom axes range")

In [None]:
# II. Seaborn
#1. Load testing dataset

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

sns.get_dataset_names()

In [None]:
tips = sns.load_dataset("tips")
tips.head()

In [None]:
# 2. Scatter plot

In [None]:
ax = sns.scatterplot(x="total_bill", y="tip", data=tips)

In [None]:
sns.relplot(x="total_bill", y="tip", data=tips, kind="scatter",
            hue="sex", size="size"
            ,)

In [None]:
# 3. Categorical functions

In [None]:
sns.catplot(x='sex', y='total_bill', hue='day', data=tips, kind='strip')

In [None]:
sns.catplot(x='sex', y="total_bill", hue="day", data=tips, kind="box")

# III. Exercises

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Colab_Notebooks/Data_Mining/Thuc_hanh/job-market.csv')
df.head()

In [None]:
jobs_by_location = df.groupby('Location')['Title'].count().sort_values(ascending = False)
jobs_by_location

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(20, 20))
sns.barplot(y=jobs_by_location.index, x=jobs_by_location.values)
plt.title('Number of job by Classification')
plt.xlabel('Number of job')
plt.ylabel('Jobs')
plt.show()

In [None]:
df['salary_range'] = pd.cut(df['HighestSalary'], bins=[0,30,40,50,60])

df['mean_salary'] = (df['LowestSalary'] + df['HighestSalary']) / 2

counts = df.groupby('salary_range').size().reset_index(name='count')

counts

In [None]:
plt.figure(figsize=(8, 8))
plt.title('Job Posts by Salary Range')
sns.set_palette('pastel')
plt.pie(counts['count'], labels=counts['salary_range'], autopct='%1.1f%%')
plt.show()

In [None]:
jobs_by_company = df.groupby('Area')['Title'].count()

sns.set(style="whitegrid")
plt.figure(figsize=(10, 10))
sns.barplot(y=jobs_by_company.values, x=jobs_by_company.index)
plt.title('Job by Company')
plt.xlabel('Number of job')
plt.xticks(rotation=90)
plt.ylabel('Jobs')
plt.show()

2. Data Correlation (Advanced and Optional)

In [None]:
df2 = pd.read_csv("/content/drive/MyDrive/Colab_Notebooks/Data_Mining/Thuc_hanh/wine.data.csv")
df2.head(10)

In [None]:
label = df2.iloc[:, 0]
data = df2.iloc[:, 1:]

In [None]:
sns.set(style='ticks')
sns.pairplot(data)
plt.show()

In [None]:
corr_matrix = data.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, cmap='coolwarm', annot=True)
plt.title('Correlation Heatmap')
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
wine_scaled = scaler.fit_transform(df2)

In [None]:
!pip3 install KMeans

In [None]:
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0)
wine_clusters = kmeans.fit_predict(wine_scaled)

In [None]:
df2['cluster'] = wine_clusters

In [None]:
sns.pairplot(df2, hue='cluster',diag_kind="hist")