#### In this kernel, we're trying to validate the hypothesis that the share of image data competitions have increased lately, 
#### and overtaken the count of tabular data competitions on Kaggle.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

#### We will need the list of competitions, tags and mapping of competitions to tags

In [None]:
comp = pd.read_csv("/kaggle/input/meta-kaggle/Competitions.csv")
comp_tags = pd.read_csv("/kaggle/input/meta-kaggle/CompetitionTags.csv")
tags = pd.read_csv("/kaggle/input/meta-kaggle/Tags.csv")
comp.head().T
comp_tags.head()
tags.sample(5)

#### Let's find the most popular competition tags

In [None]:
tags.groupby(['Slug']).agg({'CompetitionCount' : 'sum'}).query('CompetitionCount > 5').sort_values('CompetitionCount') \
.plot(kind = 'bar')

In [None]:
tags.query('Slug == "image-data" or Slug == "tabular-data" or Slug == "text-data"')

#### We could have also used the Parent Tag ID to get the types of competition data sources

In [None]:
tags.query('ParentTagId == 14100') \
.loc[:, ['Slug', 'CompetitionCount']] \
.sort_values('CompetitionCount') \
.plot(kind = 'bar', x = 'Slug', y = 'CompetitionCount')

#### Let's filter out all competitions belonging to these 3 tags - text, image and tabular data

In [None]:
comp_data = comp_tags.query('TagId == 14101 or TagId == 14102 or TagId == 14104')
comp_data.sample(5)

In [None]:
comp_data = comp_data[['CompetitionId', 'TagId']]
comp_data = pd.merge(comp_data, comp, left_on = 'CompetitionId', right_on = 'Id', how = 'inner')
comp_data.sample(5).T

In [None]:
comp_data = comp_data[comp_data['CanQualifyTiers'] == True]
comp_data.sample(5).T

In [None]:
comp_data.groupby(['TagId']).agg({'TagId' : 'count'})

In [None]:
comp_data['EnabledDate'] = pd.to_datetime(comp_data['EnabledDate'])
comp_data['Year'] = comp_data['EnabledDate'].dt.year
comp_data.head().T

In [None]:
import seaborn as sns
sns.set()
yearwise = pd.pivot_table(comp_data, index = ['Year'], columns = ['TagId'], values = ['CompetitionId'], aggfunc = 'count')
yearwise = pd.DataFrame(yearwise.to_records())
yearwise.columns = ['Year', 'Tabular', 'Image', 'Text']
yearwise.fillna(0, inplace = True)
yearwise