# Languages, Files, LOCs and LOComments, Functions

In [1]:
# import the pandas data analysis library and refer to it as pd
import pandas as pd

n_sketches_created = 14542
n_sketches_hearted = 14196
n_sketches_total = n_sketches_created + n_sketches_hearted

## General analysis

Analysis about what languages and types of files are used, how many files per sketch on avg, what diferences can be seen between popular sketches and new ones

### Created

In [2]:
# read the csv file
df_created = pd.read_csv("../analysis/cloc_created_skip_unique.csv")
df_created.set_index('language', inplace = True)
df_created.sort_values(by=['files'], ascending=False, inplace=True)
df_created

Unnamed: 0_level_0,files,blank,comment,code
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SUM,28367,232606,196620,2353688
JavaScript,12860,174290,175887,1109659
HTML,10241,12166,453,126201
Arduino Sketch,4572,42917,19439,309369
CSS,402,601,209,5737
Text,112,1368,0,599420
GLSL,83,1123,486,2993
SVG,37,2,5,824
JSON,25,7,0,26973
CSV,21,0,0,171449


In [3]:
# avg files per sketch
df_created.at['SUM', 'files']/n_sketches_created

1.950694539953239

In [4]:
# total files not js, html or pde
total = df_created['files'][4:].sum()
print(total)

# avg other files per sketch
total/n_sketches_created

694


0.04772383441067254

In [5]:
# total files not js, html or pde
total = df_created['files'][1:4].sum()
print(total)

# avg other files per sketch
total/n_sketches_created

27673


1.9029707055425664

### Hearted

In [6]:
# read the csv file
df_hearted = pd.read_csv("../analysis/cloc_hearted_skip_unique.csv")
df_hearted.set_index('language', inplace = True)
df_hearted.sort_values(by=['files'], ascending=False, inplace=True)
df_hearted

Unnamed: 0_level_0,files,blank,comment,code
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SUM,28213,410405,327733,3022307
JavaScript,12220,235742,254824,1530422
HTML,8873,10793,681,68436
Arduino Sketch,6295,133002,70553,785111
GLSL,197,2023,758,6006
CSS,178,266,80,3272
Text,154,27068,0,290111
SVG,123,2,70,8367
JSON,62,21,0,86908
Java,53,1486,767,7955


In [7]:
# avg files per sketch
df_hearted.at['SUM', 'files']/n_sketches_hearted

1.9873908143138912

In [8]:
# total files not js, html or pde
total = df_hearted['files'][4:].sum()
print(total)

# avg other files per sketch
total/n_sketches_hearted

825


0.058114961961115805

In [9]:
# total files not js, html or pde
total = df_hearted['files'][1:4].sum()
print(total)

# avg other files per sketch
total/n_sketches_hearted

27388


1.9292758523527753

### Total

In [10]:
df_total = df_created.add(df_hearted, fill_value=0)
df_total.sort_values(by=['files'], ascending=False, inplace=True)
df_total

Unnamed: 0_level_0,files,blank,comment,code
language,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SUM,56580.0,643011.0,524353.0,5375995.0
JavaScript,25080.0,410032.0,430711.0,2640081.0
HTML,19114.0,22959.0,1134.0,194637.0
Arduino Sketch,10867.0,175919.0,89992.0,1094480.0
CSS,580.0,867.0,289.0,9009.0
GLSL,280.0,3146.0,1244.0,8999.0
Text,266.0,28436.0,0.0,889531.0
SVG,160.0,4.0,75.0,9191.0
JSON,87.0,28.0,0.0,113881.0
CSV,67.0,0.0,0.0,407122.0


In [11]:
# avg files per sketch
df_total.at['SUM', 'files']/n_sketches_total

1.9688217690862273

In [12]:
# total files not js, html or pde
total = df_total['files'][4:].sum()
print(total)

# avg other files per sketch
total/n_sketches_total

1519.0


0.05285684459600529

In [13]:
# total files not js, html or pde
total = df_total['files'][1:4].sum()
print(total)

# avg other files per sketch
total/n_sketches_total

55061.0


1.915964924490222

The grand majority of sketches dont really use external media (svg) or means of information (json, csv, xml) or styling (css, glsl -- shading lang) 
Each sketch on avg has 0.05 files of these types, compared to the 1.9 files of type js, pde or html that each sketch has.

## Per language

### Javascript

In [14]:
# read the csv file
df_javascript = pd.read_csv("../analysis/cloc_analysis_JavaScript.csv")

# view the first few rows of data
df_javascript.set_index('Group')

Unnamed: 0_level_0,total files,JavaScript files,% JavaScript files,avg JavaScript files per sketch,total loc JavaScript,avg loc per file,total comments JavaScript,avg comments per file
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
created,28367,12860,45.33,0.884335,1109659,86.29,175887,13.68
hearted,28213,12220,43.31,0.860806,1530422,125.24,254824,20.85
total,56580,25080,44.33,0.872712,2640081,105.27,430711,17.17


### Arduino Sketch aka pde aka processing

In [15]:
# read the csv file
df_processing = pd.read_csv("../analysis/cloc_analysis_Arduino Sketch.csv")

# view the first few rows of data
df_processing.set_index('Group')

Unnamed: 0_level_0,total files,Arduino Sketch files,% Arduino Sketch files,avg Arduino Sketch files per sketch,total loc Arduino Sketch,avg loc per file,total comments Arduino Sketch,avg comments per file
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
created,28367,4572,16.12,0.3144,309369,67.67,19439,4.25
hearted,28213,6295,22.31,0.443435,785111,124.72,70553,11.21
total,56580,10867,19.21,0.37814,1094480,100.72,89992,8.28


### HTML

In [16]:
# read the csv file
df_html = pd.read_csv("../analysis/cloc_analysis_HTML.csv")

# view the first few rows of data
df_html.set_index('Group')

Unnamed: 0_level_0,total files,HTML files,% HTML files,avg HTML files per sketch,total loc HTML,avg loc per file,total comments HTML,avg comments per file
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
created,28367,10241,36.1,0.704236,126201,12.32,453,0.04
hearted,28213,8873,31.45,0.625035,68436,7.71,681,0.08
total,56580,19114,33.78,0.665112,194637,10.18,1134,0.06
