In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the SLOCCount data for all source files it found in the tree.
# Generated using 'sloccount --details .' in the root of the Firefox source tree.

df_sloc = pd.read_table('../data/raw/sloccount.txt', header=None, names=["lines", "language", "directory", "path"])
df_sloc.head()

Unnamed: 0,lines,language,directory,path
0,189,python,accessible,accessible/xpcom/AccEventGen.py
1,14,cpp,accessible,accessible/other/Platform.cpp
2,5,cpp,accessible,accessible/other/AccessibleWrap.cpp
3,103,cpp,accessible,accessible/html/HTMLListAccessible.cpp
4,7,cpp,accessible,accessible/html/HTMLCanvasAccessible.cpp


In [3]:
# Load the paths that Mercurial is tracking.
# Generated using 'hg files' in the root of the Firefox source tree.

df_hg= pd.read_table('../data/raw/hgfilepaths.txt', header=None, names=["path"], squeeze=True)
df_hg.head()

0              .arcconfig
1           .cargo/config
2        .cargo/config.in
3           .clang-format
4    .clang-format-ignore
Name: path, dtype: object

In [4]:
# Load paths that are under Mercurial but provided by third parties.
# Copied from 'tools/rewriting/ThirdPartyPaths.txt'.

df_vendored = pd.read_table('../data/raw/ThirdPartyPaths.txt', header=None, names=["path"], squeeze=True)
df_vendored.head()

0                               build/pymake
1            browser/components/newtab/logs/
2    browser/components/newtab/node_modules/
3          browser/components/newtab/vendor/
4       browser/components/translation/cld2/
Name: path, dtype: object

In [5]:
# Find the files under Mercurial that we wrote.
excluded = set()
[excluded.add(f) for f in df_hg for v in df_vendored if f.startswith(v)];
df_excluded = pd.Series(list(excluded))

In [6]:
df_excluded.head()

0    third_party/python/gyp/test/mac/gyptest-framew...
1    security/nss/lib/libpkix/pkix_pl_nss/pki/pkix_...
2    testing/web-platform/tests/tools/third_party/p...
3            third_party/rust/png/.cargo-checksum.json
4             intl/icu/source/data/misc/dayPeriods.txt
dtype: object

In [7]:
df_firstparty = df_hg[~df_hg.isin(df_excluded)]
print("Total files", df_hg.size)
print("Excluded files", df_excluded.size)
print("First party files", df_firstparty.size)

Total files 283798
Excluded files 56100
First party files 227698


In [8]:
# Grab just the Python files that are first-party files.
df_pyfiles = df_sloc[df_sloc.language == "python"]
df_py = df_pyfiles[df_pyfiles["path"].isin(df_firstparty)]
df_py

Unnamed: 0,lines,language,directory,path
0,189,python,accessible,accessible/xpcom/AccEventGen.py
372,24,python,browser,browser/app/macversion.py
373,14,python,browser,browser/components/payments/server.py
374,27,python,browser,browser/components/search/test/marionette/test...
375,506,python,browser,browser/components/migration/tests/marionette/...
...,...,...,...,...
41989,385,python,xpcom,xpcom/reflect/xptinfo/xptcodegen.py
41990,26,python,xpcom,xpcom/reflect/xptcall/md/win32/preprocess.py
41991,188,python,xpcom,xpcom/build/Services.py
41992,668,python,xpcom,xpcom/base/ErrorList.py


In [9]:
print("Total lines in tree:", df_py.lines.sum())

Total lines in tree: 336443


In [10]:
top_dirs = df_py.groupby("directory")

In [11]:
pd.concat([top_dirs.size(), top_dirs.sum()], axis=1).rename(columns={0: "filecount"})

Unnamed: 0_level_0,filecount,lines
directory,Unnamed: 1_level_1,Unnamed: 2_level_1
accessible,1,189
browser,7,641
build,27,3124
config,30,2157
devtools,4,152
dom,116,26578
gfx,6,829
intl,2,248
ipc,19,7753
js,108,11671


In [12]:
# Grab interesting subdirectories.

def summarize_path(pth):
    df = df_py[df_py.path.str.startswith(pth)]
    return {"path": pth, "files": len(df), "lines": df["lines"].sum()}

paths = [
    # Build
    "python/mozbuild",
    "python/mozbuild/mozbuild",
    "python/mozbuild/mozpack",
    "dom/bindings",
    # Testing
    "testing/web-platform",
    "testing/mozharness",
    "testing/mozbase",
    "testing/mochitest",
    "testing/talos",
    "layout/tools/reftest",
]

pd.DataFrame(summarize_path(p) for p in paths)

Unnamed: 0,path,files,lines
0,python/mozbuild,206,52757
1,python/mozbuild/mozbuild,173,44465
2,python/mozbuild/mozpack,30,8119
3,dom/bindings,84,24508
4,testing/web-platform,903,70378
5,testing/mozharness,314,28152
6,testing/mozbase,243,23899
7,testing/mochitest,13,5548
8,testing/talos,38,6358
9,layout/tools/reftest,10,2094
