Skip to content

Commit

Permalink
PERF: Updated andrews_curves to use a Numpy array for samples
Browse files Browse the repository at this point in the history
DOC: Added some documentation to andrews_curves
TST: Added a variable length test to TestDataFramePlots.test_andrews_curves
  • Loading branch information
Kyle committed Nov 6, 2015
1 parent 091df3e commit cbaae56
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 9 deletions.
20 changes: 20 additions & 0 deletions pandas/tests/test_graphics_others.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,26 @@ def test_andrews_curves(self):
cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])

length = 10
df = DataFrame({"A": random.rand(length),
"B": random.rand(length),
"C": random.rand(length),
"Name": ["A"] * length})

_check_plot_works(andrews_curves, frame=df, class_column='Name')

rgba = ('#556270', '#4ECDC4', '#C7F464')
ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba)
self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10])

cnames = ['dodgerblue', 'aquamarine', 'seagreen']
ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames)
self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10])

ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet)
cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique()))
self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10])

colors = ['b', 'g', 'r']
df = DataFrame({"A": [1, 2, 3],
"B": [1, 2, 3],
Expand Down
27 changes: 18 additions & 9 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,15 @@ def normalize(series):
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
colormap=None, **kwds):
"""
Generates a matplotlib plot of Andrews curves, for visualising clusters of multivariate data.
Andrews curves have the functional form:
f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + x_4 sin(2t) + x_5 cos(2t) + ...
Where x coefficients correspond to the values of each dimension and t is linearly spaced between -pi and +pi. Each
row of frame then corresponds to a single curve.
Parameters:
-----------
frame : DataFrame
Expand All @@ -527,28 +536,28 @@ def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
ax: Matplotlib axis object
"""
from math import sqrt, pi, sin, cos
from math import sqrt, pi
import matplotlib.pyplot as plt

def function(amplitudes):
def f(x):
def f(t):
x1 = amplitudes[0]
result = x1 / sqrt(2.0)
harmonic = 1.0
for x_even, x_odd in zip(amplitudes[1::2], amplitudes[2::2]):
result += (x_even * sin(harmonic * x) +
x_odd * cos(harmonic * x))
result += (x_even * np.sin(harmonic * t) +
x_odd * np.cos(harmonic * t))
harmonic += 1.0
if len(amplitudes) % 2 != 0:
result += amplitudes[-1] * sin(harmonic * x)
result += amplitudes[-1] * np.sin(harmonic * t)
return result
return f

n = len(frame)
class_col = frame[class_column]
classes = frame[class_column].drop_duplicates()
df = frame.drop(class_column, axis=1)
x = [-pi + 2.0 * pi * (t / float(samples)) for t in range(samples)]
t = np.linspace(-pi, pi, samples)
used_legends = set([])

color_values = _get_standard_colors(num_colors=len(classes),
Expand All @@ -560,14 +569,14 @@ def f(x):
for i in range(n):
row = df.iloc[i].values
f = function(row)
y = [f(t) for t in x]
y = f(t)
kls = class_col.iat[i]
label = com.pprint_thing(kls)
if label not in used_legends:
used_legends.add(label)
ax.plot(x, y, color=colors[kls], label=label, **kwds)
ax.plot(t, y, color=colors[kls], label=label, **kwds)
else:
ax.plot(x, y, color=colors[kls], **kwds)
ax.plot(t, y, color=colors[kls], **kwds)

ax.legend(loc='upper right')
ax.grid()
Expand Down

0 comments on commit cbaae56

Please sign in to comment.