-
Notifications
You must be signed in to change notification settings - Fork 4
/
03_data_visualization_a_python_plots.txt
171 lines (137 loc) · 5.65 KB
/
03_data_visualization_a_python_plots.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#########
plt.grid(True)
plt.xlim(0,2000)
#########
------------------------------------------------------------
######################### UNIVARIATE ANALYSIS ##############
------------------------------------------------------------
##BARPLOTS (Univariate Analysis)
#Bar charts of categorical data (Univariate Analysis)
train['Gender'].value_counts().head(100).plot.bar()
plt.show()
#Histogram (Univariate Analysis) - For Numeric Data
train['Interest_Rate'].plot.hist()
plt.show()
#sns.distplot = pandas hist
sns.distplot(train['Interest_Rate'].dropna(), bins=5, kde=False)
plt.show()
#bar chart sorted by x-axis
train['City_Category'].value_counts().sort_index().head(100).plot.bar()
plt.show()
#pandas bar plot = seaborn count plot
sns.countplot(train['Gender']
plt.show()
#when values are precalculated
import matplotlib.pyplot as plt
df.plot(x='col_name1', y= "dependent_variable",kind='bar')
#fig = plt.figure()
#ax = fig.add_subplot(111)
#xlabel : ax.set_xlabel('xlabel')
#ylabel : ax.set_ylabel('ylabel')
#axes title : ax.set_title('axes title')
#For more see: https://matplotlib.org/users/text_intro.html
plt.show()
-----------------------------------------------------------
##LINECHARTS (Univariate Analysis)
#Line charts of numeric data (Univariate Analysis)
train['Monthly_Income'].value_counts().sort_index().plot.line()
plt.show()
#pandas line charts sorted by x axis
train['EMI'].value_counts().sort_index().plot.line()
plt.show()
#multiple line plots : df is the dataframe having columns: date,count,media ; for different media plot with diffrent colors
#data [it is grouped by (date,media)]
date,media,count
21-03-2017,facebook,4
21-03-2017,tv,5
22-03-2017,facebook,8
sns.factorplot(x='date', y='count', hue='media', data=df, fit_reg=False)
plt.show()
---------------------------------------------------------
##KDE (Univariate Analysis)
#Kernel Density Estimate Plot (Univariate Analysis) : does smoothing
sns.kdeplot(train['Interest_Rate'].dropna())
plt.show()
---------------------------------------------------------
##PIECHARTS (Univariate Analysis)
train['Source_Category'].value_counts().head(10).plot.pie()
plt.gca().set_aspect('equal')
plt.show()
---------------------------------------------------------
##BOXPLOT (Univariate Analysis)
sns.boxplot(data=train, x = 'N34')
plt.show()
------------------------------------------------------------
######################### BIVARIATE ANALYSIS ##############
------------------------------------------------------------
##SCATTER PLOT (Bivariate Analysis)
train.plot.scatter(x='Loan_Amount', y='EMI')
plt.show()
---------------------------------------------------------------------
##LINE PLOT (Bivariate Analysis)
train.line.scatter(x='Loan_Amount', y='EMI')
plt.show()
---------------------------------------------------------------------
##KERNEL DENSITY ESTIMATE PLOT (Bivariate Analysis)
sns.kdeplot(train[['Loan_Amount','Approved']].dropna())
plt.show()
---------------------------------------------------------------------
##HEXPLOT (A hexplot aggregates points in space into hexagons, and then colorize those hexagons) (Bivariate Analysis)
train.plot.hexbin(x='Loan_Amount', y='EMI', gridsize=15)
plt.show()
---------------------------------------------------------------------
##JOINTPLOT - combine scatter and hexplot (Bivariate Analysis)
sns.jointplot(x='Loan_Amount', y='EMI', data=train[['Loan_Amount', 'EMI']].dropna())
plt.show()
sns.jointplot(x='Loan_Amount', y='EMI', data=train[['Loan_Amount', 'EMI']].dropna(), kind='hex', gridsize=20)
plt.show()
---------------------------------------------------------------------
##STACKED PLOTS (Bivariate Analysis)
train_stats_as_per_source_category = train.groupby('Source_Category').mean()[['Loan_Amount', 'Existing_EMI', 'EMI']]
train_stats_as_per_source_category.head()
'''
Loan_Amount Existing_EMI EMI
Source_Category
A 16500.000000 70.000000 848.000000
'''
#STACKED BAR
train_stats_as_per_source_category.plot.bar(stacked=True)
plt.show()
#STACKED AREA
train_stats_as_per_source_category.plot.area()
plt.show()
---------------------------------------------------------------------
##BOXPLOT (Bivariate Analysis)
sns.boxplot(x='Source_Category', y='Loan_Amount',data=train)
plt.show()
---------------------------------------------------------------------
##VIOLIN PLOT (Bivariate Analysis)
sns.violinplot(x='Source_Category', y='Loan_Amount',data=train)
plt.show()
------------------------------------------------------------
######################### MULTI-VARIATE ANALYSIS ##############
------------------------------------------------------------
##PAIRPLOT (Multivariate plots)
sns.pairplot(train[['Existing_EMI', 'Loan_Amount', 'Monthly_Income']].dropna())
plt.show()
#Facet Grid (Multivariate plots) : #A FacetGrid is an object which stores some information on how you want to break up your data visualization.
g = sns.FacetGrid(train, col="Source_Category")
g.map(sns.kdeplot, "Loan_Amount")
plt.show()
---------------------------------------------------------------------
##SUB PLOTS (Multivariate plots)
#fig, axarr = plt.subplots(<number_of_rows>, <number_of_columns>, figsize=(<along_x_axis>, <along_y_axis>))
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
plt.show()
---------------------------------------------------------------------
##SCATTER PLOTS (Multivariate plots)
sns.lmplot(x='Monthly_Income', y='Existing_EMI', hue='Source_Category', data=tra
in.dropna(), fit_reg=False)
plt.show()
---------------------------------------------------------------------
##HEAT MAP (Multivariate Plots) / Correlation Plot
sns.heatmap(train[['Monthly_Income', 'EMI', 'Existing_EMI']].corr(),annot=True)
plt.show()