In [1]:
import pandas as pd

```
1. Title: Balloon databases

2. Source: Michael Pazzani (pazzani@ics.uci.edu)

3. Past usage
   Pazzani, M. (1991). The influence of prior knowledge on concept acquisition: 
   Experimental and computational results. Journal of Experimental Psychology: 
   Learning, Memory & Cognition, 17, 3,  416-432.

4. Relevant information:
   There are four data sets representing different conditions of an experiment.
   All have the same attributes.
   a. adult-stretch.data  Inflated is true if age=adult or act=stretch
   b. adult+stretch.data  Inflated is true if age=adult and act=stretch
   c. small-yellow.data   Inflated is true if (color=yellow and size = small) or
   d. small-yellow+adult-stretch.data  Inflated is true if 
            (color=yellow and size = small) or (age=adult and act=stretch)
 
5. Number of attributes: 4

6. Number of Instances: 16

7. Attribute Information: (Classes Inflated T or F)
   Color             yellow, purple
   size              large, small
   act               stretch, dip
   age               adult, child
   inflated          T, F

8. Missing Values: None```

In [3]:
FULL_DATA_URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/balloons/adult+stretch.data'
pd.read_csv(FULL_DATA_URL)

In [4]:
balloons_df = pd.read_csv(FULL_DATA_URL, header=None)

In [5]:
balloons_df

In [6]:
balloons_column_names = [
  'color',
  'size',
  'act',
  'age',
  'inflated'
]

balloons_df.columns = balloons_column_names



In [7]:
balloons_df.head()

In [8]:
balloons_by_color = balloons_df.groupby('color')

In [9]:
balloons_by_color.groups

In [10]:
balloons_by_color.agg('count')

In [11]:
balloons_by_color_and_size = balloons_df.groupby(['color', 'size'])

In [12]:
balloons_by_color_and_size.agg('count')

In [13]:
balloons_df.groupby(['color','size','act']).agg('count')

In [14]:
balloons_reduced_df = balloons_df.drop_duplicates()

In [15]:
balloons_df.shape, balloons_reduced_df.shape

In [16]:
balloons_df.groupby(['color','size','act']).agg('count')

In [17]:
balloons_reduced_df.groupby(['color','size','act', 'inflated']).agg('count')

In [18]:
balloons_reduced_df.groupby(['color','act', 'size','inflated']).agg('count')

In [19]:
balloons_reduced_df.groupby(['act','color', 'size','inflated']).agg('count')

In [20]:
balloons_reduced_df.groupby(['act']).groups

In [21]:
dip = balloons_reduced_df.groupby(['act']).groups['DIP']
stretch = balloons_reduced_df.groupby(['act']).groups['STRETCH']

In [22]:
balloons_df.loc[dip].groupby('inflated').count()

In [23]:
balloons_df.loc[stretch].groupby(['age','inflated']).count()

In [24]:
balloons_df.groupby(['act','age','inflated']).count()

In [25]:
class MyVector:

    def __init__(self, values):
        self.values = values
    def __add__(self, other):
       return MyVector([j+k for j,k in zip(self.values, other.values)])
    def __radd__(self, other):
       return MyVector([j+k for j,k in zip(self.values, other.values)])
    def __repr__(self):
       return str(self.values)


In [26]:
u = MyVector([1,2,3])
v = MyVector([4,5,6])
u+v