In [41]:
import pandas as pd
import math
import numpy as np
from lux.vis.VisList import VisList

def similar_pattern(ldf, *args):
    '''
    Generates visualizations with similar patterns to a query visualization.

    Parameters
    ----------
    ldf : lux.core.frame
        LuxDataFrame with underspecified intent.

    intent: list[lux.Clause]
        intent for specifying the visual query for the similarity search.

    topK: int
        number of visual recommendations to return.

    Returns
    -------
    recommendations : Dict[str,obj]
    object with a collection of visualizations that result from the Similarity action
    '''
#     print(args[0][0])
#     query = args[0][0]
    row_specs = df.intent
    last = get_filter_specs(ldf.intent)[-1]
    query = ldf.intent.copy()[0:-1]
    # array of possible values for attribute
    arr = ldf[last.attribute].unique().tolist()
    query.append(lux.Clause(last.attribute, last.attribute, arr))
    topK = 15
    
    row_specs = list(filter(lambda x: x.value != "", row_specs))
    if(len(row_specs) == 1):
        search_space_vc = VisList(ldf.current_vis,ldf)
        
        query_vc = VisList(query,ldf)     
        query_vis = query_vc[0]
        print(query_vis.data)
        preprocess(query_vis)
        #for loop to create assign euclidean distance
        recommendation = {"action":"Similarity",
                               "description":"Show other charts that are visually similar to the Current vis."}
        for vis in search_space_vc:
            preprocess(vis)
            vis.score = euclidean_dist(query_vis, vis)
        search_space_vc.normalize_score(invert_order=True)
        if(topK!=-1):
            search_space_vc = search_space_vc.topK(topK)
        recommendation["collection"] = search_space_vc
        return recommendation
    else:
        print("Query needs to have 1 row value")

def aggregate(vis):
    '''
    Aggregates data values on the y axis so that the vis is a time series

    Parameters
    ----------
    vis : lux.vis.Vis
        vis that represents the candidate visualization
    Returns
    -------
    None
    '''
    if vis.get_attr_by_channel("x") and vis.get_attr_by_channel("y"):

        xAxis = vis.get_attr_by_channel("x")[0].attribute
        yAxis = vis.get_attr_by_channel("y")[0].attribute

        vis.data = vis.data[[xAxis,yAxis]].groupby(xAxis,as_index=False).agg({yAxis:'mean'}).copy()

def interpolate(vis,length):
    '''
    Interpolates the vis data so that the number of data points is fixed to a constant

    Parameters
    ----------
    vis : lux.vis.Vis
        vis that represents the candidate visualization
    length : int
        number of points a vis should have

    Returns
    -------
    None
    '''
    if vis.get_attr_by_channel("x") and vis.get_attr_by_channel("y"):

        xAxis = vis.get_attr_by_channel("x")[0].attribute
        yAxis = vis.get_attr_by_channel("y")[0].attribute

        if xAxis and yAxis:
            yVals = vis.data[yAxis]
            xVals = vis.data[xAxis]
            n = length

            interpolated_x_vals = [0.0]*(length)
            interpolated_y_vals = [0.0]*(length)

            granularity = (xVals[len(xVals)-1] - xVals[0]) / n

            count = 0

            for i in range(0,n):
                interpolated_x = xVals[0] + i * granularity
                interpolated_x_vals[i] = interpolated_x

                while xVals[count] < interpolated_x:
                    if(count < len(xVals)):
                        count += 1
                if xVals[count] == interpolated_x:
                    interpolated_y_vals[i] = yVals[count]
                else:
                    x_diff = xVals[count] - xVals[count-1]
                    yDiff = yVals[count] - yVals[count-1]
                    interpolated_y_vals[i] = yVals[count-1] + (interpolated_x - xVals[count-1]) / x_diff * yDiff
            vis.data = pd.DataFrame(list(zip(interpolated_x_vals, interpolated_y_vals)),columns = [xAxis, yAxis])

# interpolate dataset

def normalize(vis):
    '''
    Normalizes the vis data so that the range of values is 0 to 1 for the vis

    Parameters
    ----------
    vis : lux.vis.Vis
        vis that represents the candidate visualization
    Returns
    -------
    None
    '''
    if vis.get_attr_by_channel("y"):
        y_axis = vis.get_attr_by_channel("y")[0].attribute
        max = vis.data[y_axis].max()
        min = vis.data[y_axis].min()
        if(max == min or (max-min<1)):
            return
        vis.data[y_axis] = (vis.data[y_axis] - min) / (max - min)

def euclidean_dist(query_vis, vis):
    '''
    Calculates euclidean distance score for similarity between two visualizations

    Parameters
    ----------
    query_vis : lux.vis.Vis
        vis that represents the query pattern
    vis : lux.vis.Vis
        vis that represents the candidate visualization

    Returns
    -------
    score : float
        euclidean distance score
    '''

    if query_vis.get_attr_by_channel("y") and vis.get_attr_by_channel("y"):

        vis_y_axis = vis.get_attr_by_channel("y")[0].attribute
        query_y_axis = query_vis.get_attr_by_channel("y")[0].attribute

        vis_vector = vis.data[vis_y_axis].values
        query_vector = query_vis.data[query_y_axis].values
        score = np.linalg.norm(vis_vector - query_vector)

        return score
    else:
        print("no y axis detected")
        return 0
def preprocess(vis):
    '''
    Processes vis data to allow similarity comparisons between visualizations

    Parameters
    ----------
    vis : lux.vis.Vis
        vis that represents the candidate visualization
    Returns
    -------
    None
    '''
#     aggregate(vis)
#     interpolate(vis, 100)
    normalize(vis)



In [11]:
import pandas as pd
import lux
from lux.vis.Clause import Clause
from lux.utils.utils import get_filter_specs


In [19]:
df = pd.read_csv("../lux/data/car.csv")
df["Year"] = pd.to_datetime(df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype

df.set_intent([lux.Clause("Year",channel="x"), 
               lux.Clause("Displacement",channel="y"), 
               lux.Clause("Origin=USA")])
# df.set_intent([lux.Clause("SATAverage",channel="x"), 
#                lux.Clause("AdmissionRate",channel="y"), 
#                lux.Clause("HighestDegree=Graduate")])

# df.current_vis[0].mark
# len(get_filter_specs(df.intent))
df._repr_html_()
df.recommendation["Similarity"][0].score

Button(description='Toggle Pandas/Lux', layout=Layout(top='5px', width='140px'), style=ButtonStyle())

Output()

1.0

In [24]:
one_current_vis = lambda ldf: ldf.current_vis is not None and len(ldf.current_vis) == 1

In [42]:
df = pd.read_csv("../lux/data/car.csv")
df["Year"] = pd.to_datetime(df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype
# df["Month"] = pd.to_datetime(df["Month"], format='%M') # change pandas dtype for the column "Year" to datetype

df.set_intent([lux.Clause("Year",channel="x"), 
               lux.Clause("Displacement",channel="y"), 
               lux.Clause("Origin=USA")])
# last = get_filter_specs(df.intent)[-1]
# query = df.intent.copy()[0:-1]
# # array of possible values for attribute
# arr = df[last.attribute].unique().tolist()
# query.append(lux.Clause(last.attribute, last.attribute, arr))
# query_vc = VisList(query,df)     
# query_vc

# search_space_vc = VisList(df.current_vis,df)
        
# preprocess(query_vis)
# #for loop to create assign euclidean distance
# recommendation = {"action":"Similarity",
#                                "description":"Show other charts that are visually similar to the Current vis."}
# for vis in search_space_vc:
#     preprocess(vis)
#     vis.score = euclidean_dist(query_vis, vis)
# search_space_vc.normalize_score(invert_order=True)

In [43]:
lux.register_action("similarity", similar_pattern, one_current_vis)

In [44]:
df

Unexpected error in rendering Lux widget and recommendations. Falling back to Pandas display.
Please report the following issue on Github: https://github.com/lux-org/lux/issues 

AttributeError: 'VisList' object has no attribute 'get_attr_by_channel'



Unnamed: 0,Name,MilesPerGal,Cylinders,Displacement,Horsepower,Weight,Acceleration,Year,Origin,Brand
0,chevrolet chevelle malibu,18.0,8,307.0,130,3504,12.0,1970-01-01,USA,chevrolet
1,buick skylark 320,15.0,8,350.0,165,3693,11.5,1970-01-01,USA,buick
2,plymouth satellite,18.0,8,318.0,150,3436,11.0,1970-01-01,USA,plymouth
3,amc rebel sst,16.0,8,304.0,150,3433,12.0,1970-01-01,USA,amc
4,ford torino,17.0,8,302.0,140,3449,10.5,1970-01-01,USA,ford
...,...,...,...,...,...,...,...,...,...,...
387,ford mustang gl,27.0,4,140.0,86,2790,15.6,1982-01-01,USA,ford
388,vw pickup,44.0,4,97.0,52,2130,24.6,1982-01-01,Europe,volkswagen
389,dodge rampage,32.0,4,135.0,84,2295,11.6,1982-01-01,USA,dodge
390,ford ranger,28.0,4,120.0,79,2625,18.6,1982-01-01,USA,ford




In [10]:
df.current_vis

LuxWidget(recommendations=[{'action': 'Vis List', 'description': 'Shows a vis list defined by the intent', 'vs…

[<Vis  (x: Year, y: MEAN(Displacement)  -- [Origin=USA] ) mark: line, score: 0.00 >]